{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999955847745365, "eval_steps": 500, "global_step": 28311, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.532180370790634e-05, "grad_norm": 23.15082550048828, "learning_rate": 1.176470588235294e-08, "loss": 2.0769, "step": 1 }, { "epoch": 7.064360741581268e-05, "grad_norm": 25.97966766357422, "learning_rate": 2.352941176470588e-08, "loss": 2.3071, "step": 2 }, { "epoch": 0.00010596541112371903, "grad_norm": 54.74532699584961, "learning_rate": 3.529411764705883e-08, "loss": 2.26, "step": 3 }, { "epoch": 0.00014128721483162537, "grad_norm": 26.32728385925293, "learning_rate": 4.705882352941176e-08, "loss": 2.2633, "step": 4 }, { "epoch": 0.00017660901853953172, "grad_norm": 22.4090633392334, "learning_rate": 5.882352941176471e-08, "loss": 2.0785, "step": 5 }, { "epoch": 0.00021193082224743806, "grad_norm": 24.599184036254883, "learning_rate": 7.058823529411766e-08, "loss": 2.1726, "step": 6 }, { "epoch": 0.0002472526259553444, "grad_norm": 20.615018844604492, "learning_rate": 8.23529411764706e-08, "loss": 2.0311, "step": 7 }, { "epoch": 0.00028257442966325073, "grad_norm": 25.4075870513916, "learning_rate": 9.411764705882353e-08, "loss": 2.2629, "step": 8 }, { "epoch": 0.0003178962333711571, "grad_norm": 31.685449600219727, "learning_rate": 1.0588235294117648e-07, "loss": 2.1134, "step": 9 }, { "epoch": 0.00035321803707906343, "grad_norm": 23.76656723022461, "learning_rate": 1.1764705882352942e-07, "loss": 2.1808, "step": 10 }, { "epoch": 0.0003885398407869698, "grad_norm": 23.398433685302734, "learning_rate": 1.2941176470588236e-07, "loss": 2.1298, "step": 11 }, { "epoch": 0.00042386164449487613, "grad_norm": 21.550745010375977, "learning_rate": 1.4117647058823532e-07, "loss": 2.1267, "step": 12 }, { "epoch": 0.00045918344820278245, "grad_norm": 25.39097785949707, "learning_rate": 1.5294117647058826e-07, "loss": 2.2461, "step": 13 }, { "epoch": 0.0004945052519106888, "grad_norm": 26.43467140197754, "learning_rate": 1.647058823529412e-07, "loss": 2.3165, "step": 14 }, { "epoch": 0.0005298270556185952, "grad_norm": 23.53121566772461, "learning_rate": 1.7647058823529414e-07, "loss": 2.1297, "step": 15 }, { "epoch": 0.0005651488593265015, "grad_norm": 24.044174194335938, "learning_rate": 1.8823529411764705e-07, "loss": 2.1125, "step": 16 }, { "epoch": 0.0006004706630344078, "grad_norm": 22.629589080810547, "learning_rate": 2.0000000000000002e-07, "loss": 2.084, "step": 17 }, { "epoch": 0.0006357924667423142, "grad_norm": 22.788454055786133, "learning_rate": 2.1176470588235296e-07, "loss": 2.1288, "step": 18 }, { "epoch": 0.0006711142704502206, "grad_norm": 27.590309143066406, "learning_rate": 2.2352941176470592e-07, "loss": 2.2311, "step": 19 }, { "epoch": 0.0007064360741581269, "grad_norm": 24.901334762573242, "learning_rate": 2.3529411764705883e-07, "loss": 2.0927, "step": 20 }, { "epoch": 0.0007417578778660332, "grad_norm": 22.417926788330078, "learning_rate": 2.4705882352941175e-07, "loss": 2.0778, "step": 21 }, { "epoch": 0.0007770796815739396, "grad_norm": 23.53537940979004, "learning_rate": 2.588235294117647e-07, "loss": 2.0747, "step": 22 }, { "epoch": 0.0008124014852818459, "grad_norm": 22.87490463256836, "learning_rate": 2.705882352941177e-07, "loss": 1.9935, "step": 23 }, { "epoch": 0.0008477232889897523, "grad_norm": 22.251205444335938, "learning_rate": 2.8235294117647064e-07, "loss": 2.0213, "step": 24 }, { "epoch": 0.0008830450926976586, "grad_norm": 22.674468994140625, "learning_rate": 2.9411764705882356e-07, "loss": 2.0206, "step": 25 }, { "epoch": 0.0009183668964055649, "grad_norm": 18.661380767822266, "learning_rate": 3.058823529411765e-07, "loss": 1.8984, "step": 26 }, { "epoch": 0.0009536887001134713, "grad_norm": 21.701141357421875, "learning_rate": 3.176470588235294e-07, "loss": 1.8694, "step": 27 }, { "epoch": 0.0009890105038213776, "grad_norm": 20.639493942260742, "learning_rate": 3.294117647058824e-07, "loss": 1.8344, "step": 28 }, { "epoch": 0.001024332307529284, "grad_norm": 16.69619369506836, "learning_rate": 3.411764705882353e-07, "loss": 1.8048, "step": 29 }, { "epoch": 0.0010596541112371904, "grad_norm": 17.718008041381836, "learning_rate": 3.529411764705883e-07, "loss": 1.8434, "step": 30 }, { "epoch": 0.0010949759149450967, "grad_norm": 18.549636840820312, "learning_rate": 3.647058823529412e-07, "loss": 1.8546, "step": 31 }, { "epoch": 0.001130297718653003, "grad_norm": 15.718324661254883, "learning_rate": 3.764705882352941e-07, "loss": 1.7825, "step": 32 }, { "epoch": 0.0011656195223609094, "grad_norm": 15.922802925109863, "learning_rate": 3.882352941176471e-07, "loss": 1.7683, "step": 33 }, { "epoch": 0.0012009413260688157, "grad_norm": 14.466650009155273, "learning_rate": 4.0000000000000003e-07, "loss": 1.738, "step": 34 }, { "epoch": 0.001236263129776722, "grad_norm": 17.803752899169922, "learning_rate": 4.1176470588235295e-07, "loss": 1.7066, "step": 35 }, { "epoch": 0.0012715849334846284, "grad_norm": 13.215091705322266, "learning_rate": 4.235294117647059e-07, "loss": 1.6832, "step": 36 }, { "epoch": 0.0013069067371925347, "grad_norm": 15.53039264678955, "learning_rate": 4.352941176470588e-07, "loss": 1.619, "step": 37 }, { "epoch": 0.0013422285409004412, "grad_norm": 14.240071296691895, "learning_rate": 4.4705882352941184e-07, "loss": 1.6304, "step": 38 }, { "epoch": 0.0013775503446083475, "grad_norm": 11.761480331420898, "learning_rate": 4.5882352941176476e-07, "loss": 0.7466, "step": 39 }, { "epoch": 0.0014128721483162537, "grad_norm": 12.062878608703613, "learning_rate": 4.7058823529411767e-07, "loss": 1.6311, "step": 40 }, { "epoch": 0.0014481939520241602, "grad_norm": 13.43281078338623, "learning_rate": 4.823529411764706e-07, "loss": 1.608, "step": 41 }, { "epoch": 0.0014835157557320665, "grad_norm": 12.768863677978516, "learning_rate": 4.941176470588235e-07, "loss": 1.574, "step": 42 }, { "epoch": 0.0015188375594399727, "grad_norm": 11.40450382232666, "learning_rate": 5.058823529411766e-07, "loss": 1.5091, "step": 43 }, { "epoch": 0.0015541593631478792, "grad_norm": 9.406295776367188, "learning_rate": 5.176470588235294e-07, "loss": 1.5051, "step": 44 }, { "epoch": 0.0015894811668557855, "grad_norm": 8.55344009399414, "learning_rate": 5.294117647058824e-07, "loss": 1.5221, "step": 45 }, { "epoch": 0.0016248029705636918, "grad_norm": 7.164737701416016, "learning_rate": 5.411764705882354e-07, "loss": 1.5095, "step": 46 }, { "epoch": 0.0016601247742715982, "grad_norm": 7.229923248291016, "learning_rate": 5.529411764705883e-07, "loss": 1.519, "step": 47 }, { "epoch": 0.0016954465779795045, "grad_norm": 5.485545635223389, "learning_rate": 5.647058823529413e-07, "loss": 1.4377, "step": 48 }, { "epoch": 0.0017307683816874108, "grad_norm": 5.96727991104126, "learning_rate": 5.764705882352941e-07, "loss": 1.4965, "step": 49 }, { "epoch": 0.0017660901853953173, "grad_norm": 5.190463542938232, "learning_rate": 5.882352941176471e-07, "loss": 1.4321, "step": 50 }, { "epoch": 0.0018014119891032235, "grad_norm": 5.00197172164917, "learning_rate": 6.000000000000001e-07, "loss": 1.4199, "step": 51 }, { "epoch": 0.0018367337928111298, "grad_norm": 4.8883819580078125, "learning_rate": 6.11764705882353e-07, "loss": 1.4651, "step": 52 }, { "epoch": 0.0018720555965190363, "grad_norm": 5.251221656799316, "learning_rate": 6.235294117647059e-07, "loss": 1.3894, "step": 53 }, { "epoch": 0.0019073774002269425, "grad_norm": 4.8882832527160645, "learning_rate": 6.352941176470588e-07, "loss": 1.3945, "step": 54 }, { "epoch": 0.001942699203934849, "grad_norm": 3.6660454273223877, "learning_rate": 6.470588235294118e-07, "loss": 1.3751, "step": 55 }, { "epoch": 0.0019780210076427553, "grad_norm": 4.555511474609375, "learning_rate": 6.588235294117648e-07, "loss": 1.3811, "step": 56 }, { "epoch": 0.0020133428113506616, "grad_norm": 3.437067985534668, "learning_rate": 6.705882352941178e-07, "loss": 1.3431, "step": 57 }, { "epoch": 0.002048664615058568, "grad_norm": 4.443134784698486, "learning_rate": 6.823529411764706e-07, "loss": 1.3421, "step": 58 }, { "epoch": 0.002083986418766474, "grad_norm": 3.2147815227508545, "learning_rate": 6.941176470588236e-07, "loss": 1.3468, "step": 59 }, { "epoch": 0.002119308222474381, "grad_norm": 3.64875864982605, "learning_rate": 7.058823529411766e-07, "loss": 1.3674, "step": 60 }, { "epoch": 0.002154630026182287, "grad_norm": 4.147745609283447, "learning_rate": 7.176470588235294e-07, "loss": 1.3701, "step": 61 }, { "epoch": 0.0021899518298901933, "grad_norm": 3.0543735027313232, "learning_rate": 7.294117647058824e-07, "loss": 1.3707, "step": 62 }, { "epoch": 0.0022252736335980996, "grad_norm": 3.3600568771362305, "learning_rate": 7.411764705882353e-07, "loss": 1.4266, "step": 63 }, { "epoch": 0.002260595437306006, "grad_norm": 3.1660494804382324, "learning_rate": 7.529411764705882e-07, "loss": 1.36, "step": 64 }, { "epoch": 0.0022959172410139126, "grad_norm": 2.763989210128784, "learning_rate": 7.647058823529413e-07, "loss": 1.3656, "step": 65 }, { "epoch": 0.002331239044721819, "grad_norm": 3.7927870750427246, "learning_rate": 7.764705882352942e-07, "loss": 1.3476, "step": 66 }, { "epoch": 0.002366560848429725, "grad_norm": 3.714984178543091, "learning_rate": 7.882352941176471e-07, "loss": 1.3572, "step": 67 }, { "epoch": 0.0024018826521376314, "grad_norm": 3.1687562465667725, "learning_rate": 8.000000000000001e-07, "loss": 1.3325, "step": 68 }, { "epoch": 0.0024372044558455376, "grad_norm": 5.115025997161865, "learning_rate": 8.11764705882353e-07, "loss": 1.319, "step": 69 }, { "epoch": 0.002472526259553444, "grad_norm": 2.713385820388794, "learning_rate": 8.235294117647059e-07, "loss": 1.3152, "step": 70 }, { "epoch": 0.0025078480632613506, "grad_norm": 2.8473446369171143, "learning_rate": 8.352941176470589e-07, "loss": 1.3017, "step": 71 }, { "epoch": 0.002543169866969257, "grad_norm": 2.694237232208252, "learning_rate": 8.470588235294118e-07, "loss": 1.3065, "step": 72 }, { "epoch": 0.002578491670677163, "grad_norm": 2.9111084938049316, "learning_rate": 8.588235294117647e-07, "loss": 1.2756, "step": 73 }, { "epoch": 0.0026138134743850694, "grad_norm": 2.6875123977661133, "learning_rate": 8.705882352941177e-07, "loss": 1.2883, "step": 74 }, { "epoch": 0.0026491352780929757, "grad_norm": 2.9322800636291504, "learning_rate": 8.823529411764707e-07, "loss": 1.3112, "step": 75 }, { "epoch": 0.0026844570818008824, "grad_norm": 2.512535810470581, "learning_rate": 8.941176470588237e-07, "loss": 1.2944, "step": 76 }, { "epoch": 0.0027197788855087886, "grad_norm": 2.676769495010376, "learning_rate": 9.058823529411765e-07, "loss": 1.3252, "step": 77 }, { "epoch": 0.002755100689216695, "grad_norm": 2.7886526584625244, "learning_rate": 9.176470588235295e-07, "loss": 1.325, "step": 78 }, { "epoch": 0.002790422492924601, "grad_norm": 2.5460731983184814, "learning_rate": 9.294117647058825e-07, "loss": 1.3073, "step": 79 }, { "epoch": 0.0028257442966325074, "grad_norm": 3.356539011001587, "learning_rate": 9.411764705882353e-07, "loss": 1.3156, "step": 80 }, { "epoch": 0.0028610661003404137, "grad_norm": 2.5881128311157227, "learning_rate": 9.529411764705883e-07, "loss": 1.2832, "step": 81 }, { "epoch": 0.0028963879040483204, "grad_norm": 3.723356008529663, "learning_rate": 9.647058823529413e-07, "loss": 1.3063, "step": 82 }, { "epoch": 0.0029317097077562267, "grad_norm": 2.4947762489318848, "learning_rate": 9.764705882352942e-07, "loss": 1.2909, "step": 83 }, { "epoch": 0.002967031511464133, "grad_norm": 2.6063435077667236, "learning_rate": 9.88235294117647e-07, "loss": 1.2986, "step": 84 }, { "epoch": 0.003002353315172039, "grad_norm": 2.6957151889801025, "learning_rate": 1.0000000000000002e-06, "loss": 1.287, "step": 85 }, { "epoch": 0.0030376751188799455, "grad_norm": 2.9856972694396973, "learning_rate": 1.0117647058823531e-06, "loss": 1.284, "step": 86 }, { "epoch": 0.0030729969225878517, "grad_norm": 2.647916078567505, "learning_rate": 1.023529411764706e-06, "loss": 1.2754, "step": 87 }, { "epoch": 0.0031083187262957584, "grad_norm": 2.782546281814575, "learning_rate": 1.0352941176470589e-06, "loss": 1.3065, "step": 88 }, { "epoch": 0.0031436405300036647, "grad_norm": 2.6871581077575684, "learning_rate": 1.0470588235294118e-06, "loss": 1.2802, "step": 89 }, { "epoch": 0.003178962333711571, "grad_norm": 2.633903741836548, "learning_rate": 1.0588235294117648e-06, "loss": 1.2591, "step": 90 }, { "epoch": 0.0032142841374194773, "grad_norm": 2.3300681114196777, "learning_rate": 1.0705882352941177e-06, "loss": 1.2315, "step": 91 }, { "epoch": 0.0032496059411273835, "grad_norm": 3.117643117904663, "learning_rate": 1.0823529411764707e-06, "loss": 1.2367, "step": 92 }, { "epoch": 0.0032849277448352902, "grad_norm": 2.4106857776641846, "learning_rate": 1.0941176470588235e-06, "loss": 1.2239, "step": 93 }, { "epoch": 0.0033202495485431965, "grad_norm": 2.530299425125122, "learning_rate": 1.1058823529411766e-06, "loss": 1.2182, "step": 94 }, { "epoch": 0.0033555713522511028, "grad_norm": 2.5051488876342773, "learning_rate": 1.1176470588235296e-06, "loss": 1.2527, "step": 95 }, { "epoch": 0.003390893155959009, "grad_norm": 2.6833531856536865, "learning_rate": 1.1294117647058826e-06, "loss": 1.2776, "step": 96 }, { "epoch": 0.0034262149596669153, "grad_norm": 3.3134219646453857, "learning_rate": 1.1411764705882353e-06, "loss": 1.2486, "step": 97 }, { "epoch": 0.0034615367633748216, "grad_norm": 2.5807814598083496, "learning_rate": 1.1529411764705883e-06, "loss": 1.2926, "step": 98 }, { "epoch": 0.0034968585670827283, "grad_norm": 2.5620322227478027, "learning_rate": 1.1647058823529413e-06, "loss": 1.2701, "step": 99 }, { "epoch": 0.0035321803707906345, "grad_norm": 2.5785834789276123, "learning_rate": 1.1764705882352942e-06, "loss": 1.2379, "step": 100 }, { "epoch": 0.003567502174498541, "grad_norm": 2.479386329650879, "learning_rate": 1.1882352941176472e-06, "loss": 1.2299, "step": 101 }, { "epoch": 0.003602823978206447, "grad_norm": 2.5526175498962402, "learning_rate": 1.2000000000000002e-06, "loss": 1.2554, "step": 102 }, { "epoch": 0.0036381457819143533, "grad_norm": 2.4943504333496094, "learning_rate": 1.211764705882353e-06, "loss": 1.2478, "step": 103 }, { "epoch": 0.0036734675856222596, "grad_norm": 2.6309638023376465, "learning_rate": 1.223529411764706e-06, "loss": 1.282, "step": 104 }, { "epoch": 0.0037087893893301663, "grad_norm": 2.7967281341552734, "learning_rate": 1.235294117647059e-06, "loss": 1.2494, "step": 105 }, { "epoch": 0.0037441111930380726, "grad_norm": 2.442413568496704, "learning_rate": 1.2470588235294118e-06, "loss": 1.2551, "step": 106 }, { "epoch": 0.003779432996745979, "grad_norm": 2.687411308288574, "learning_rate": 1.2588235294117648e-06, "loss": 1.2232, "step": 107 }, { "epoch": 0.003814754800453885, "grad_norm": 2.694110631942749, "learning_rate": 1.2705882352941175e-06, "loss": 1.2391, "step": 108 }, { "epoch": 0.0038500766041617914, "grad_norm": 2.8049755096435547, "learning_rate": 1.2823529411764707e-06, "loss": 1.2225, "step": 109 }, { "epoch": 0.003885398407869698, "grad_norm": 2.449047327041626, "learning_rate": 1.2941176470588237e-06, "loss": 1.2013, "step": 110 }, { "epoch": 0.003920720211577604, "grad_norm": 2.5729479789733887, "learning_rate": 1.3058823529411766e-06, "loss": 1.2409, "step": 111 }, { "epoch": 0.003956042015285511, "grad_norm": 3.0559933185577393, "learning_rate": 1.3176470588235296e-06, "loss": 1.2512, "step": 112 }, { "epoch": 0.003991363818993417, "grad_norm": 2.672880172729492, "learning_rate": 1.3294117647058824e-06, "loss": 1.2451, "step": 113 }, { "epoch": 0.004026685622701323, "grad_norm": 2.4905519485473633, "learning_rate": 1.3411764705882355e-06, "loss": 1.2308, "step": 114 }, { "epoch": 0.004062007426409229, "grad_norm": 2.6998202800750732, "learning_rate": 1.3529411764705883e-06, "loss": 1.2183, "step": 115 }, { "epoch": 0.004097329230117136, "grad_norm": 2.283674716949463, "learning_rate": 1.3647058823529413e-06, "loss": 1.2463, "step": 116 }, { "epoch": 0.004132651033825042, "grad_norm": 7.531800746917725, "learning_rate": 1.3764705882352942e-06, "loss": 1.198, "step": 117 }, { "epoch": 0.004167972837532948, "grad_norm": 2.300344944000244, "learning_rate": 1.3882352941176472e-06, "loss": 1.2095, "step": 118 }, { "epoch": 0.004203294641240855, "grad_norm": 2.9549763202667236, "learning_rate": 1.4000000000000001e-06, "loss": 1.1774, "step": 119 }, { "epoch": 0.004238616444948762, "grad_norm": 2.3900609016418457, "learning_rate": 1.4117647058823531e-06, "loss": 1.1955, "step": 120 }, { "epoch": 0.004273938248656668, "grad_norm": 2.2775721549987793, "learning_rate": 1.423529411764706e-06, "loss": 1.2242, "step": 121 }, { "epoch": 0.004309260052364574, "grad_norm": 2.258622646331787, "learning_rate": 1.4352941176470588e-06, "loss": 1.2088, "step": 122 }, { "epoch": 0.00434458185607248, "grad_norm": 2.352567195892334, "learning_rate": 1.447058823529412e-06, "loss": 1.2367, "step": 123 }, { "epoch": 0.004379903659780387, "grad_norm": 2.609729290008545, "learning_rate": 1.4588235294117648e-06, "loss": 1.2001, "step": 124 }, { "epoch": 0.004415225463488293, "grad_norm": 2.577096700668335, "learning_rate": 1.4705882352941177e-06, "loss": 1.2055, "step": 125 }, { "epoch": 0.004450547267196199, "grad_norm": 2.453108787536621, "learning_rate": 1.4823529411764707e-06, "loss": 1.219, "step": 126 }, { "epoch": 0.0044858690709041055, "grad_norm": 2.6247191429138184, "learning_rate": 1.4941176470588237e-06, "loss": 1.2033, "step": 127 }, { "epoch": 0.004521190874612012, "grad_norm": 2.7486698627471924, "learning_rate": 1.5058823529411764e-06, "loss": 1.221, "step": 128 }, { "epoch": 0.004556512678319918, "grad_norm": 2.586102247238159, "learning_rate": 1.5176470588235296e-06, "loss": 1.2165, "step": 129 }, { "epoch": 0.004591834482027825, "grad_norm": 2.9754931926727295, "learning_rate": 1.5294117647058826e-06, "loss": 1.225, "step": 130 }, { "epoch": 0.004627156285735731, "grad_norm": 2.885432004928589, "learning_rate": 1.5411764705882353e-06, "loss": 1.2171, "step": 131 }, { "epoch": 0.004662478089443638, "grad_norm": 2.449453115463257, "learning_rate": 1.5529411764705885e-06, "loss": 1.2249, "step": 132 }, { "epoch": 0.004697799893151544, "grad_norm": 2.3213951587677, "learning_rate": 1.5647058823529412e-06, "loss": 1.1962, "step": 133 }, { "epoch": 0.00473312169685945, "grad_norm": 2.466780185699463, "learning_rate": 1.5764705882352942e-06, "loss": 1.1974, "step": 134 }, { "epoch": 0.0047684435005673565, "grad_norm": 2.3215417861938477, "learning_rate": 1.5882352941176472e-06, "loss": 1.1888, "step": 135 }, { "epoch": 0.004803765304275263, "grad_norm": 2.4232325553894043, "learning_rate": 1.6000000000000001e-06, "loss": 1.2027, "step": 136 }, { "epoch": 0.004839087107983169, "grad_norm": 2.6028223037719727, "learning_rate": 1.6117647058823529e-06, "loss": 1.1835, "step": 137 }, { "epoch": 0.004874408911691075, "grad_norm": 2.303213357925415, "learning_rate": 1.623529411764706e-06, "loss": 1.2353, "step": 138 }, { "epoch": 0.0049097307153989815, "grad_norm": 2.5022809505462646, "learning_rate": 1.635294117647059e-06, "loss": 1.2147, "step": 139 }, { "epoch": 0.004945052519106888, "grad_norm": 2.780975341796875, "learning_rate": 1.6470588235294118e-06, "loss": 1.2275, "step": 140 }, { "epoch": 0.004980374322814795, "grad_norm": 2.3591465950012207, "learning_rate": 1.658823529411765e-06, "loss": 1.1881, "step": 141 }, { "epoch": 0.005015696126522701, "grad_norm": 2.325483560562134, "learning_rate": 1.6705882352941177e-06, "loss": 1.2075, "step": 142 }, { "epoch": 0.0050510179302306075, "grad_norm": 2.4584434032440186, "learning_rate": 1.682352941176471e-06, "loss": 1.19, "step": 143 }, { "epoch": 0.005086339733938514, "grad_norm": 2.6740782260894775, "learning_rate": 1.6941176470588237e-06, "loss": 1.2219, "step": 144 }, { "epoch": 0.00512166153764642, "grad_norm": 2.82415509223938, "learning_rate": 1.7058823529411766e-06, "loss": 0.6011, "step": 145 }, { "epoch": 0.005156983341354326, "grad_norm": 2.2542624473571777, "learning_rate": 1.7176470588235294e-06, "loss": 1.177, "step": 146 }, { "epoch": 0.0051923051450622325, "grad_norm": 2.3872885704040527, "learning_rate": 1.7294117647058825e-06, "loss": 1.1638, "step": 147 }, { "epoch": 0.005227626948770139, "grad_norm": 2.213329315185547, "learning_rate": 1.7411764705882353e-06, "loss": 1.1762, "step": 148 }, { "epoch": 0.005262948752478045, "grad_norm": 2.316561698913574, "learning_rate": 1.7529411764705883e-06, "loss": 1.1492, "step": 149 }, { "epoch": 0.005298270556185951, "grad_norm": 2.528315305709839, "learning_rate": 1.7647058823529414e-06, "loss": 1.1819, "step": 150 }, { "epoch": 0.005333592359893858, "grad_norm": 2.5782065391540527, "learning_rate": 1.7764705882352942e-06, "loss": 1.1741, "step": 151 }, { "epoch": 0.005368914163601765, "grad_norm": 2.3220808506011963, "learning_rate": 1.7882352941176474e-06, "loss": 1.1673, "step": 152 }, { "epoch": 0.005404235967309671, "grad_norm": 2.3463494777679443, "learning_rate": 1.8000000000000001e-06, "loss": 1.1793, "step": 153 }, { "epoch": 0.005439557771017577, "grad_norm": 2.637049913406372, "learning_rate": 1.811764705882353e-06, "loss": 1.1632, "step": 154 }, { "epoch": 0.0054748795747254836, "grad_norm": 2.3154406547546387, "learning_rate": 1.8235294117647058e-06, "loss": 1.1368, "step": 155 }, { "epoch": 0.00551020137843339, "grad_norm": 2.3247036933898926, "learning_rate": 1.835294117647059e-06, "loss": 1.1612, "step": 156 }, { "epoch": 0.005545523182141296, "grad_norm": 2.8977444171905518, "learning_rate": 1.8470588235294118e-06, "loss": 1.1359, "step": 157 }, { "epoch": 0.005580844985849202, "grad_norm": 2.298610210418701, "learning_rate": 1.858823529411765e-06, "loss": 1.1594, "step": 158 }, { "epoch": 0.005616166789557109, "grad_norm": 2.510413885116577, "learning_rate": 1.870588235294118e-06, "loss": 1.2044, "step": 159 }, { "epoch": 0.005651488593265015, "grad_norm": 2.6811790466308594, "learning_rate": 1.8823529411764707e-06, "loss": 1.1981, "step": 160 }, { "epoch": 0.005686810396972921, "grad_norm": 2.2536849975585938, "learning_rate": 1.8941176470588239e-06, "loss": 1.1654, "step": 161 }, { "epoch": 0.005722132200680827, "grad_norm": 2.797267436981201, "learning_rate": 1.9058823529411766e-06, "loss": 1.1595, "step": 162 }, { "epoch": 0.005757454004388734, "grad_norm": 2.342047691345215, "learning_rate": 1.9176470588235298e-06, "loss": 1.1467, "step": 163 }, { "epoch": 0.005792775808096641, "grad_norm": 2.6271889209747314, "learning_rate": 1.9294117647058825e-06, "loss": 1.1844, "step": 164 }, { "epoch": 0.005828097611804547, "grad_norm": 2.6657779216766357, "learning_rate": 1.9411764705882353e-06, "loss": 1.1787, "step": 165 }, { "epoch": 0.005863419415512453, "grad_norm": 2.3528318405151367, "learning_rate": 1.9529411764705885e-06, "loss": 1.1675, "step": 166 }, { "epoch": 0.00589874121922036, "grad_norm": 2.8759348392486572, "learning_rate": 1.9647058823529412e-06, "loss": 1.1504, "step": 167 }, { "epoch": 0.005934063022928266, "grad_norm": 2.1822752952575684, "learning_rate": 1.976470588235294e-06, "loss": 1.1586, "step": 168 }, { "epoch": 0.005969384826636172, "grad_norm": 2.27101731300354, "learning_rate": 1.988235294117647e-06, "loss": 1.1601, "step": 169 }, { "epoch": 0.006004706630344078, "grad_norm": 2.1371796131134033, "learning_rate": 2.0000000000000003e-06, "loss": 1.152, "step": 170 }, { "epoch": 0.006040028434051985, "grad_norm": 2.3363194465637207, "learning_rate": 2.011764705882353e-06, "loss": 1.1606, "step": 171 }, { "epoch": 0.006075350237759891, "grad_norm": 2.1349406242370605, "learning_rate": 2.0235294117647063e-06, "loss": 1.1799, "step": 172 }, { "epoch": 0.006110672041467797, "grad_norm": 2.3249363899230957, "learning_rate": 2.035294117647059e-06, "loss": 1.1594, "step": 173 }, { "epoch": 0.0061459938451757035, "grad_norm": 2.1584038734436035, "learning_rate": 2.047058823529412e-06, "loss": 1.1387, "step": 174 }, { "epoch": 0.006181315648883611, "grad_norm": 2.5644092559814453, "learning_rate": 2.058823529411765e-06, "loss": 1.1643, "step": 175 }, { "epoch": 0.006216637452591517, "grad_norm": 2.4288971424102783, "learning_rate": 2.0705882352941177e-06, "loss": 1.1449, "step": 176 }, { "epoch": 0.006251959256299423, "grad_norm": 2.2012672424316406, "learning_rate": 2.0823529411764705e-06, "loss": 1.1499, "step": 177 }, { "epoch": 0.006287281060007329, "grad_norm": 2.1390492916107178, "learning_rate": 2.0941176470588236e-06, "loss": 1.1722, "step": 178 }, { "epoch": 0.006322602863715236, "grad_norm": 2.248410701751709, "learning_rate": 2.105882352941177e-06, "loss": 1.153, "step": 179 }, { "epoch": 0.006357924667423142, "grad_norm": 2.2758612632751465, "learning_rate": 2.1176470588235296e-06, "loss": 1.1622, "step": 180 }, { "epoch": 0.006393246471131048, "grad_norm": 2.144024610519409, "learning_rate": 2.1294117647058827e-06, "loss": 1.1576, "step": 181 }, { "epoch": 0.0064285682748389545, "grad_norm": 2.3045990467071533, "learning_rate": 2.1411764705882355e-06, "loss": 1.1454, "step": 182 }, { "epoch": 0.006463890078546861, "grad_norm": 2.1093788146972656, "learning_rate": 2.1529411764705887e-06, "loss": 1.1568, "step": 183 }, { "epoch": 0.006499211882254767, "grad_norm": 2.5025932788848877, "learning_rate": 2.1647058823529414e-06, "loss": 1.1565, "step": 184 }, { "epoch": 0.006534533685962673, "grad_norm": 2.3024392127990723, "learning_rate": 2.176470588235294e-06, "loss": 1.15, "step": 185 }, { "epoch": 0.0065698554896705804, "grad_norm": 2.2564828395843506, "learning_rate": 2.188235294117647e-06, "loss": 1.1511, "step": 186 }, { "epoch": 0.006605177293378487, "grad_norm": 2.253701686859131, "learning_rate": 2.2e-06, "loss": 1.1331, "step": 187 }, { "epoch": 0.006640499097086393, "grad_norm": 2.3943164348602295, "learning_rate": 2.2117647058823533e-06, "loss": 1.1446, "step": 188 }, { "epoch": 0.006675820900794299, "grad_norm": 2.1487646102905273, "learning_rate": 2.223529411764706e-06, "loss": 1.107, "step": 189 }, { "epoch": 0.0067111427045022055, "grad_norm": 2.2095444202423096, "learning_rate": 2.2352941176470592e-06, "loss": 1.1097, "step": 190 }, { "epoch": 0.006746464508210112, "grad_norm": 2.0986499786376953, "learning_rate": 2.247058823529412e-06, "loss": 1.1196, "step": 191 }, { "epoch": 0.006781786311918018, "grad_norm": 2.220679998397827, "learning_rate": 2.258823529411765e-06, "loss": 1.1602, "step": 192 }, { "epoch": 0.006817108115625924, "grad_norm": 2.3369388580322266, "learning_rate": 2.270588235294118e-06, "loss": 1.1852, "step": 193 }, { "epoch": 0.006852429919333831, "grad_norm": 2.434583902359009, "learning_rate": 2.2823529411764707e-06, "loss": 1.1634, "step": 194 }, { "epoch": 0.006887751723041737, "grad_norm": 2.1867456436157227, "learning_rate": 2.2941176470588234e-06, "loss": 1.1611, "step": 195 }, { "epoch": 0.006923073526749643, "grad_norm": 2.2617063522338867, "learning_rate": 2.3058823529411766e-06, "loss": 1.0973, "step": 196 }, { "epoch": 0.00695839533045755, "grad_norm": 2.2023425102233887, "learning_rate": 2.3176470588235293e-06, "loss": 1.1214, "step": 197 }, { "epoch": 0.0069937171341654565, "grad_norm": 2.2341506481170654, "learning_rate": 2.3294117647058825e-06, "loss": 1.1655, "step": 198 }, { "epoch": 0.007029038937873363, "grad_norm": 2.441977024078369, "learning_rate": 2.3411764705882357e-06, "loss": 1.1998, "step": 199 }, { "epoch": 0.007064360741581269, "grad_norm": 2.436368703842163, "learning_rate": 2.3529411764705885e-06, "loss": 1.1131, "step": 200 }, { "epoch": 0.007099682545289175, "grad_norm": 2.303463935852051, "learning_rate": 2.3647058823529416e-06, "loss": 1.1377, "step": 201 }, { "epoch": 0.007135004348997082, "grad_norm": 2.3388023376464844, "learning_rate": 2.3764705882352944e-06, "loss": 1.137, "step": 202 }, { "epoch": 0.007170326152704988, "grad_norm": 2.352865219116211, "learning_rate": 2.388235294117647e-06, "loss": 1.1557, "step": 203 }, { "epoch": 0.007205647956412894, "grad_norm": 2.218451976776123, "learning_rate": 2.4000000000000003e-06, "loss": 1.1278, "step": 204 }, { "epoch": 0.0072409697601208, "grad_norm": 2.286803960800171, "learning_rate": 2.411764705882353e-06, "loss": 1.1405, "step": 205 }, { "epoch": 0.007276291563828707, "grad_norm": 2.6477997303009033, "learning_rate": 2.423529411764706e-06, "loss": 1.1663, "step": 206 }, { "epoch": 0.007311613367536613, "grad_norm": 2.4374940395355225, "learning_rate": 2.435294117647059e-06, "loss": 1.1503, "step": 207 }, { "epoch": 0.007346935171244519, "grad_norm": 2.7090516090393066, "learning_rate": 2.447058823529412e-06, "loss": 1.1168, "step": 208 }, { "epoch": 0.007382256974952426, "grad_norm": 2.177992582321167, "learning_rate": 2.458823529411765e-06, "loss": 1.0955, "step": 209 }, { "epoch": 0.007417578778660333, "grad_norm": 2.5337178707122803, "learning_rate": 2.470588235294118e-06, "loss": 1.1997, "step": 210 }, { "epoch": 0.007452900582368239, "grad_norm": 2.1415274143218994, "learning_rate": 2.482352941176471e-06, "loss": 1.1388, "step": 211 }, { "epoch": 0.007488222386076145, "grad_norm": 2.7903926372528076, "learning_rate": 2.4941176470588236e-06, "loss": 1.135, "step": 212 }, { "epoch": 0.007523544189784051, "grad_norm": 2.4099743366241455, "learning_rate": 2.505882352941177e-06, "loss": 1.1338, "step": 213 }, { "epoch": 0.007558865993491958, "grad_norm": 2.270811080932617, "learning_rate": 2.5176470588235295e-06, "loss": 1.1229, "step": 214 }, { "epoch": 0.007594187797199864, "grad_norm": 2.49417781829834, "learning_rate": 2.5294117647058823e-06, "loss": 1.1411, "step": 215 }, { "epoch": 0.00762950960090777, "grad_norm": 2.462214231491089, "learning_rate": 2.541176470588235e-06, "loss": 1.1958, "step": 216 }, { "epoch": 0.0076648314046156765, "grad_norm": 2.273739814758301, "learning_rate": 2.5529411764705887e-06, "loss": 1.1574, "step": 217 }, { "epoch": 0.007700153208323583, "grad_norm": 2.3411974906921387, "learning_rate": 2.5647058823529414e-06, "loss": 1.1027, "step": 218 }, { "epoch": 0.007735475012031489, "grad_norm": 2.2769148349761963, "learning_rate": 2.576470588235294e-06, "loss": 1.1399, "step": 219 }, { "epoch": 0.007770796815739396, "grad_norm": 2.385493278503418, "learning_rate": 2.5882352941176473e-06, "loss": 1.0763, "step": 220 }, { "epoch": 0.007806118619447302, "grad_norm": 2.2707746028900146, "learning_rate": 2.6e-06, "loss": 1.1183, "step": 221 }, { "epoch": 0.007841440423155209, "grad_norm": 2.1989352703094482, "learning_rate": 2.6117647058823533e-06, "loss": 1.1043, "step": 222 }, { "epoch": 0.007876762226863114, "grad_norm": 2.665156364440918, "learning_rate": 2.623529411764706e-06, "loss": 1.0895, "step": 223 }, { "epoch": 0.007912084030571021, "grad_norm": 2.282320737838745, "learning_rate": 2.635294117647059e-06, "loss": 1.1551, "step": 224 }, { "epoch": 0.007947405834278927, "grad_norm": 2.415076494216919, "learning_rate": 2.647058823529412e-06, "loss": 1.1581, "step": 225 }, { "epoch": 0.007982727637986834, "grad_norm": 2.3109335899353027, "learning_rate": 2.6588235294117647e-06, "loss": 1.1257, "step": 226 }, { "epoch": 0.00801804944169474, "grad_norm": 2.368706226348877, "learning_rate": 2.6705882352941175e-06, "loss": 1.1542, "step": 227 }, { "epoch": 0.008053371245402646, "grad_norm": 2.2171123027801514, "learning_rate": 2.682352941176471e-06, "loss": 1.0981, "step": 228 }, { "epoch": 0.008088693049110553, "grad_norm": 2.442793846130371, "learning_rate": 2.694117647058824e-06, "loss": 1.1107, "step": 229 }, { "epoch": 0.008124014852818459, "grad_norm": 2.093168258666992, "learning_rate": 2.7058823529411766e-06, "loss": 1.1046, "step": 230 }, { "epoch": 0.008159336656526366, "grad_norm": 2.3587801456451416, "learning_rate": 2.7176470588235297e-06, "loss": 1.1084, "step": 231 }, { "epoch": 0.008194658460234271, "grad_norm": 2.2036142349243164, "learning_rate": 2.7294117647058825e-06, "loss": 1.0877, "step": 232 }, { "epoch": 0.008229980263942178, "grad_norm": 2.2866320610046387, "learning_rate": 2.7411764705882353e-06, "loss": 1.078, "step": 233 }, { "epoch": 0.008265302067650084, "grad_norm": 2.411362409591675, "learning_rate": 2.7529411764705884e-06, "loss": 1.1503, "step": 234 }, { "epoch": 0.008300623871357991, "grad_norm": 2.249601364135742, "learning_rate": 2.7647058823529416e-06, "loss": 1.1463, "step": 235 }, { "epoch": 0.008335945675065896, "grad_norm": 2.636225461959839, "learning_rate": 2.7764705882352944e-06, "loss": 1.1107, "step": 236 }, { "epoch": 0.008371267478773804, "grad_norm": 2.6178624629974365, "learning_rate": 2.788235294117647e-06, "loss": 1.1139, "step": 237 }, { "epoch": 0.00840658928248171, "grad_norm": 2.5231716632843018, "learning_rate": 2.8000000000000003e-06, "loss": 1.1049, "step": 238 }, { "epoch": 0.008441911086189616, "grad_norm": 2.413121461868286, "learning_rate": 2.8117647058823535e-06, "loss": 1.1099, "step": 239 }, { "epoch": 0.008477232889897523, "grad_norm": 2.402926206588745, "learning_rate": 2.8235294117647062e-06, "loss": 1.1378, "step": 240 }, { "epoch": 0.008512554693605429, "grad_norm": 2.2157437801361084, "learning_rate": 2.835294117647059e-06, "loss": 1.1186, "step": 241 }, { "epoch": 0.008547876497313336, "grad_norm": 2.2484657764434814, "learning_rate": 2.847058823529412e-06, "loss": 1.1296, "step": 242 }, { "epoch": 0.008583198301021241, "grad_norm": 2.2313332557678223, "learning_rate": 2.858823529411765e-06, "loss": 1.1126, "step": 243 }, { "epoch": 0.008618520104729148, "grad_norm": 2.6769893169403076, "learning_rate": 2.8705882352941177e-06, "loss": 1.129, "step": 244 }, { "epoch": 0.008653841908437054, "grad_norm": 2.2225122451782227, "learning_rate": 2.8823529411764704e-06, "loss": 1.099, "step": 245 }, { "epoch": 0.00868916371214496, "grad_norm": 2.1553547382354736, "learning_rate": 2.894117647058824e-06, "loss": 1.087, "step": 246 }, { "epoch": 0.008724485515852866, "grad_norm": 2.2423534393310547, "learning_rate": 2.9058823529411768e-06, "loss": 1.0891, "step": 247 }, { "epoch": 0.008759807319560773, "grad_norm": 2.2706284523010254, "learning_rate": 2.9176470588235295e-06, "loss": 1.1178, "step": 248 }, { "epoch": 0.00879512912326868, "grad_norm": 2.0747036933898926, "learning_rate": 2.9294117647058827e-06, "loss": 1.0604, "step": 249 }, { "epoch": 0.008830450926976586, "grad_norm": 2.270862340927124, "learning_rate": 2.9411764705882355e-06, "loss": 1.1129, "step": 250 }, { "epoch": 0.008865772730684493, "grad_norm": 2.2540347576141357, "learning_rate": 2.9529411764705882e-06, "loss": 1.1383, "step": 251 }, { "epoch": 0.008901094534392398, "grad_norm": 1.7107337713241577, "learning_rate": 2.9647058823529414e-06, "loss": 0.5852, "step": 252 }, { "epoch": 0.008936416338100306, "grad_norm": 2.462921142578125, "learning_rate": 2.9764705882352946e-06, "loss": 1.1034, "step": 253 }, { "epoch": 0.008971738141808211, "grad_norm": 2.488675832748413, "learning_rate": 2.9882352941176473e-06, "loss": 1.0999, "step": 254 }, { "epoch": 0.009007059945516118, "grad_norm": 2.270099401473999, "learning_rate": 3e-06, "loss": 1.0901, "step": 255 }, { "epoch": 0.009042381749224023, "grad_norm": 2.084080219268799, "learning_rate": 3.011764705882353e-06, "loss": 1.1088, "step": 256 }, { "epoch": 0.00907770355293193, "grad_norm": 2.468623161315918, "learning_rate": 3.0235294117647064e-06, "loss": 1.1232, "step": 257 }, { "epoch": 0.009113025356639836, "grad_norm": 1.6430821418762207, "learning_rate": 3.035294117647059e-06, "loss": 0.57, "step": 258 }, { "epoch": 0.009148347160347743, "grad_norm": 2.6149742603302, "learning_rate": 3.047058823529412e-06, "loss": 1.1088, "step": 259 }, { "epoch": 0.00918366896405565, "grad_norm": 2.323439836502075, "learning_rate": 3.058823529411765e-06, "loss": 1.1003, "step": 260 }, { "epoch": 0.009218990767763556, "grad_norm": 2.3921234607696533, "learning_rate": 3.070588235294118e-06, "loss": 1.1259, "step": 261 }, { "epoch": 0.009254312571471463, "grad_norm": 2.2501771450042725, "learning_rate": 3.0823529411764706e-06, "loss": 1.0883, "step": 262 }, { "epoch": 0.009289634375179368, "grad_norm": 2.3467068672180176, "learning_rate": 3.0941176470588234e-06, "loss": 1.0793, "step": 263 }, { "epoch": 0.009324956178887275, "grad_norm": 2.386831045150757, "learning_rate": 3.105882352941177e-06, "loss": 1.1097, "step": 264 }, { "epoch": 0.00936027798259518, "grad_norm": 2.3294565677642822, "learning_rate": 3.1176470588235297e-06, "loss": 1.1569, "step": 265 }, { "epoch": 0.009395599786303088, "grad_norm": 2.3478405475616455, "learning_rate": 3.1294117647058825e-06, "loss": 1.0903, "step": 266 }, { "epoch": 0.009430921590010993, "grad_norm": 2.3798093795776367, "learning_rate": 3.1411764705882357e-06, "loss": 1.1104, "step": 267 }, { "epoch": 0.0094662433937189, "grad_norm": 2.383192777633667, "learning_rate": 3.1529411764705884e-06, "loss": 1.0899, "step": 268 }, { "epoch": 0.009501565197426806, "grad_norm": 2.2417736053466797, "learning_rate": 3.1647058823529416e-06, "loss": 1.1197, "step": 269 }, { "epoch": 0.009536887001134713, "grad_norm": 2.1665518283843994, "learning_rate": 3.1764705882352943e-06, "loss": 1.1024, "step": 270 }, { "epoch": 0.00957220880484262, "grad_norm": 2.1018807888031006, "learning_rate": 3.1882352941176475e-06, "loss": 1.0834, "step": 271 }, { "epoch": 0.009607530608550525, "grad_norm": 2.2169220447540283, "learning_rate": 3.2000000000000003e-06, "loss": 1.1089, "step": 272 }, { "epoch": 0.009642852412258433, "grad_norm": 2.3937861919403076, "learning_rate": 3.211764705882353e-06, "loss": 1.1466, "step": 273 }, { "epoch": 0.009678174215966338, "grad_norm": 2.2921109199523926, "learning_rate": 3.2235294117647058e-06, "loss": 1.0764, "step": 274 }, { "epoch": 0.009713496019674245, "grad_norm": 2.2313711643218994, "learning_rate": 3.2352941176470594e-06, "loss": 1.0978, "step": 275 }, { "epoch": 0.00974881782338215, "grad_norm": 2.104316234588623, "learning_rate": 3.247058823529412e-06, "loss": 1.0975, "step": 276 }, { "epoch": 0.009784139627090058, "grad_norm": 2.333829641342163, "learning_rate": 3.258823529411765e-06, "loss": 1.0876, "step": 277 }, { "epoch": 0.009819461430797963, "grad_norm": 2.319845676422119, "learning_rate": 3.270588235294118e-06, "loss": 1.1029, "step": 278 }, { "epoch": 0.00985478323450587, "grad_norm": 2.0594887733459473, "learning_rate": 3.282352941176471e-06, "loss": 1.0549, "step": 279 }, { "epoch": 0.009890105038213776, "grad_norm": 2.3593103885650635, "learning_rate": 3.2941176470588236e-06, "loss": 1.1155, "step": 280 }, { "epoch": 0.009925426841921683, "grad_norm": 2.524333953857422, "learning_rate": 3.3058823529411763e-06, "loss": 1.0943, "step": 281 }, { "epoch": 0.00996074864562959, "grad_norm": 2.3754072189331055, "learning_rate": 3.31764705882353e-06, "loss": 1.0942, "step": 282 }, { "epoch": 0.009996070449337495, "grad_norm": 2.1459708213806152, "learning_rate": 3.3294117647058827e-06, "loss": 1.0858, "step": 283 }, { "epoch": 0.010031392253045402, "grad_norm": 2.4207093715667725, "learning_rate": 3.3411764705882354e-06, "loss": 1.0631, "step": 284 }, { "epoch": 0.010066714056753308, "grad_norm": 2.4578254222869873, "learning_rate": 3.352941176470588e-06, "loss": 1.1124, "step": 285 }, { "epoch": 0.010102035860461215, "grad_norm": 2.4316720962524414, "learning_rate": 3.364705882352942e-06, "loss": 1.1085, "step": 286 }, { "epoch": 0.01013735766416912, "grad_norm": 2.3542542457580566, "learning_rate": 3.3764705882352946e-06, "loss": 1.0976, "step": 287 }, { "epoch": 0.010172679467877027, "grad_norm": 2.2466461658477783, "learning_rate": 3.3882352941176473e-06, "loss": 1.0927, "step": 288 }, { "epoch": 0.010208001271584933, "grad_norm": 2.789839744567871, "learning_rate": 3.4000000000000005e-06, "loss": 1.1265, "step": 289 }, { "epoch": 0.01024332307529284, "grad_norm": 2.2236626148223877, "learning_rate": 3.4117647058823532e-06, "loss": 1.081, "step": 290 }, { "epoch": 0.010278644879000745, "grad_norm": 2.417717933654785, "learning_rate": 3.423529411764706e-06, "loss": 1.1394, "step": 291 }, { "epoch": 0.010313966682708653, "grad_norm": 2.2153854370117188, "learning_rate": 3.4352941176470587e-06, "loss": 1.0669, "step": 292 }, { "epoch": 0.01034928848641656, "grad_norm": 2.306919574737549, "learning_rate": 3.4470588235294123e-06, "loss": 1.0962, "step": 293 }, { "epoch": 0.010384610290124465, "grad_norm": 2.1533565521240234, "learning_rate": 3.458823529411765e-06, "loss": 1.0898, "step": 294 }, { "epoch": 0.010419932093832372, "grad_norm": 2.1670873165130615, "learning_rate": 3.470588235294118e-06, "loss": 1.0613, "step": 295 }, { "epoch": 0.010455253897540278, "grad_norm": 2.0257396697998047, "learning_rate": 3.4823529411764706e-06, "loss": 1.0819, "step": 296 }, { "epoch": 0.010490575701248185, "grad_norm": 2.1984951496124268, "learning_rate": 3.4941176470588238e-06, "loss": 1.0857, "step": 297 }, { "epoch": 0.01052589750495609, "grad_norm": 2.1790239810943604, "learning_rate": 3.5058823529411765e-06, "loss": 1.0795, "step": 298 }, { "epoch": 0.010561219308663997, "grad_norm": 2.1546454429626465, "learning_rate": 3.5176470588235297e-06, "loss": 1.0906, "step": 299 }, { "epoch": 0.010596541112371903, "grad_norm": 2.292004346847534, "learning_rate": 3.529411764705883e-06, "loss": 1.1027, "step": 300 }, { "epoch": 0.01063186291607981, "grad_norm": 2.1775670051574707, "learning_rate": 3.5411764705882356e-06, "loss": 1.0692, "step": 301 }, { "epoch": 0.010667184719787715, "grad_norm": 2.0815436840057373, "learning_rate": 3.5529411764705884e-06, "loss": 1.0815, "step": 302 }, { "epoch": 0.010702506523495622, "grad_norm": 2.2609121799468994, "learning_rate": 3.564705882352941e-06, "loss": 1.0566, "step": 303 }, { "epoch": 0.01073782832720353, "grad_norm": 2.1847357749938965, "learning_rate": 3.5764705882352948e-06, "loss": 1.0939, "step": 304 }, { "epoch": 0.010773150130911435, "grad_norm": 2.128725051879883, "learning_rate": 3.5882352941176475e-06, "loss": 1.0481, "step": 305 }, { "epoch": 0.010808471934619342, "grad_norm": 2.245635986328125, "learning_rate": 3.6000000000000003e-06, "loss": 1.1022, "step": 306 }, { "epoch": 0.010843793738327247, "grad_norm": 2.3876848220825195, "learning_rate": 3.6117647058823534e-06, "loss": 1.0417, "step": 307 }, { "epoch": 0.010879115542035155, "grad_norm": 2.4185197353363037, "learning_rate": 3.623529411764706e-06, "loss": 1.0633, "step": 308 }, { "epoch": 0.01091443734574306, "grad_norm": 2.2377851009368896, "learning_rate": 3.635294117647059e-06, "loss": 1.0736, "step": 309 }, { "epoch": 0.010949759149450967, "grad_norm": 2.051044464111328, "learning_rate": 3.6470588235294117e-06, "loss": 1.0895, "step": 310 }, { "epoch": 0.010985080953158873, "grad_norm": 2.275336265563965, "learning_rate": 3.6588235294117653e-06, "loss": 1.0891, "step": 311 }, { "epoch": 0.01102040275686678, "grad_norm": 2.362004518508911, "learning_rate": 3.670588235294118e-06, "loss": 1.0723, "step": 312 }, { "epoch": 0.011055724560574685, "grad_norm": 2.130342483520508, "learning_rate": 3.682352941176471e-06, "loss": 1.075, "step": 313 }, { "epoch": 0.011091046364282592, "grad_norm": 2.0637781620025635, "learning_rate": 3.6941176470588236e-06, "loss": 1.0726, "step": 314 }, { "epoch": 0.011126368167990498, "grad_norm": 2.2018284797668457, "learning_rate": 3.7058823529411767e-06, "loss": 1.0642, "step": 315 }, { "epoch": 0.011161689971698405, "grad_norm": 2.269298791885376, "learning_rate": 3.71764705882353e-06, "loss": 1.0901, "step": 316 }, { "epoch": 0.011197011775406312, "grad_norm": 1.5088388919830322, "learning_rate": 3.7294117647058827e-06, "loss": 0.5809, "step": 317 }, { "epoch": 0.011232333579114217, "grad_norm": 2.3513803482055664, "learning_rate": 3.741176470588236e-06, "loss": 1.0834, "step": 318 }, { "epoch": 0.011267655382822124, "grad_norm": 2.2825615406036377, "learning_rate": 3.7529411764705886e-06, "loss": 1.1156, "step": 319 }, { "epoch": 0.01130297718653003, "grad_norm": 2.2284250259399414, "learning_rate": 3.7647058823529414e-06, "loss": 1.0844, "step": 320 }, { "epoch": 0.011338298990237937, "grad_norm": 2.0995635986328125, "learning_rate": 3.776470588235294e-06, "loss": 1.0627, "step": 321 }, { "epoch": 0.011373620793945842, "grad_norm": 2.2014119625091553, "learning_rate": 3.7882352941176477e-06, "loss": 1.0864, "step": 322 }, { "epoch": 0.01140894259765375, "grad_norm": 2.1623353958129883, "learning_rate": 3.8000000000000005e-06, "loss": 1.0492, "step": 323 }, { "epoch": 0.011444264401361655, "grad_norm": 2.104987382888794, "learning_rate": 3.8117647058823532e-06, "loss": 1.0602, "step": 324 }, { "epoch": 0.011479586205069562, "grad_norm": 2.281287670135498, "learning_rate": 3.8235294117647055e-06, "loss": 1.0882, "step": 325 }, { "epoch": 0.011514908008777467, "grad_norm": 2.194974660873413, "learning_rate": 3.8352941176470596e-06, "loss": 1.0695, "step": 326 }, { "epoch": 0.011550229812485375, "grad_norm": 2.0958383083343506, "learning_rate": 3.847058823529412e-06, "loss": 1.0222, "step": 327 }, { "epoch": 0.011585551616193282, "grad_norm": 2.2701728343963623, "learning_rate": 3.858823529411765e-06, "loss": 1.0845, "step": 328 }, { "epoch": 0.011620873419901187, "grad_norm": 2.2406368255615234, "learning_rate": 3.870588235294118e-06, "loss": 1.0359, "step": 329 }, { "epoch": 0.011656195223609094, "grad_norm": 2.242861747741699, "learning_rate": 3.882352941176471e-06, "loss": 1.0694, "step": 330 }, { "epoch": 0.011691517027317, "grad_norm": 2.464031457901001, "learning_rate": 3.894117647058824e-06, "loss": 1.0843, "step": 331 }, { "epoch": 0.011726838831024907, "grad_norm": 2.2415003776550293, "learning_rate": 3.905882352941177e-06, "loss": 1.1042, "step": 332 }, { "epoch": 0.011762160634732812, "grad_norm": 2.469194173812866, "learning_rate": 3.91764705882353e-06, "loss": 1.0912, "step": 333 }, { "epoch": 0.01179748243844072, "grad_norm": 2.314094066619873, "learning_rate": 3.9294117647058824e-06, "loss": 1.0663, "step": 334 }, { "epoch": 0.011832804242148625, "grad_norm": 2.1162047386169434, "learning_rate": 3.941176470588236e-06, "loss": 1.1027, "step": 335 }, { "epoch": 0.011868126045856532, "grad_norm": 2.279606580734253, "learning_rate": 3.952941176470588e-06, "loss": 1.0464, "step": 336 }, { "epoch": 0.011903447849564437, "grad_norm": 1.3897268772125244, "learning_rate": 3.964705882352942e-06, "loss": 0.5648, "step": 337 }, { "epoch": 0.011938769653272344, "grad_norm": 2.2931416034698486, "learning_rate": 3.976470588235294e-06, "loss": 1.0748, "step": 338 }, { "epoch": 0.011974091456980251, "grad_norm": 2.2392728328704834, "learning_rate": 3.9882352941176475e-06, "loss": 1.0715, "step": 339 }, { "epoch": 0.012009413260688157, "grad_norm": 2.0910747051239014, "learning_rate": 4.000000000000001e-06, "loss": 1.0329, "step": 340 }, { "epoch": 0.012044735064396064, "grad_norm": 2.2423183917999268, "learning_rate": 4.011764705882353e-06, "loss": 1.0918, "step": 341 }, { "epoch": 0.01208005686810397, "grad_norm": 2.114332675933838, "learning_rate": 4.023529411764706e-06, "loss": 1.06, "step": 342 }, { "epoch": 0.012115378671811877, "grad_norm": 2.0313827991485596, "learning_rate": 4.0352941176470585e-06, "loss": 1.0825, "step": 343 }, { "epoch": 0.012150700475519782, "grad_norm": 2.5535361766815186, "learning_rate": 4.0470588235294125e-06, "loss": 1.0736, "step": 344 }, { "epoch": 0.012186022279227689, "grad_norm": 2.405487537384033, "learning_rate": 4.058823529411765e-06, "loss": 1.0613, "step": 345 }, { "epoch": 0.012221344082935594, "grad_norm": 2.12123703956604, "learning_rate": 4.070588235294118e-06, "loss": 1.0584, "step": 346 }, { "epoch": 0.012256665886643502, "grad_norm": 1.4056977033615112, "learning_rate": 4.082352941176471e-06, "loss": 0.5473, "step": 347 }, { "epoch": 0.012291987690351407, "grad_norm": 2.2217459678649902, "learning_rate": 4.094117647058824e-06, "loss": 1.0607, "step": 348 }, { "epoch": 0.012327309494059314, "grad_norm": 2.218441963195801, "learning_rate": 4.105882352941177e-06, "loss": 1.0007, "step": 349 }, { "epoch": 0.012362631297767221, "grad_norm": 2.1530778408050537, "learning_rate": 4.11764705882353e-06, "loss": 1.0505, "step": 350 }, { "epoch": 0.012397953101475127, "grad_norm": 2.2443604469299316, "learning_rate": 4.129411764705883e-06, "loss": 1.0781, "step": 351 }, { "epoch": 0.012433274905183034, "grad_norm": 2.4709954261779785, "learning_rate": 4.141176470588235e-06, "loss": 1.0754, "step": 352 }, { "epoch": 0.01246859670889094, "grad_norm": 2.266965389251709, "learning_rate": 4.152941176470589e-06, "loss": 1.0195, "step": 353 }, { "epoch": 0.012503918512598846, "grad_norm": 2.0727121829986572, "learning_rate": 4.164705882352941e-06, "loss": 1.0687, "step": 354 }, { "epoch": 0.012539240316306752, "grad_norm": 2.10981822013855, "learning_rate": 4.176470588235295e-06, "loss": 1.0541, "step": 355 }, { "epoch": 0.012574562120014659, "grad_norm": 2.1328365802764893, "learning_rate": 4.188235294117647e-06, "loss": 1.0738, "step": 356 }, { "epoch": 0.012609883923722564, "grad_norm": 2.3774054050445557, "learning_rate": 4.2000000000000004e-06, "loss": 1.0273, "step": 357 }, { "epoch": 0.012645205727430471, "grad_norm": 2.2351276874542236, "learning_rate": 4.211764705882354e-06, "loss": 1.0518, "step": 358 }, { "epoch": 0.012680527531138377, "grad_norm": 2.1297757625579834, "learning_rate": 4.223529411764706e-06, "loss": 1.0466, "step": 359 }, { "epoch": 0.012715849334846284, "grad_norm": 2.2754673957824707, "learning_rate": 4.235294117647059e-06, "loss": 1.0954, "step": 360 }, { "epoch": 0.012751171138554191, "grad_norm": 2.0657541751861572, "learning_rate": 4.247058823529412e-06, "loss": 1.033, "step": 361 }, { "epoch": 0.012786492942262096, "grad_norm": 2.2688252925872803, "learning_rate": 4.2588235294117655e-06, "loss": 1.067, "step": 362 }, { "epoch": 0.012821814745970004, "grad_norm": 2.1606409549713135, "learning_rate": 4.270588235294118e-06, "loss": 1.057, "step": 363 }, { "epoch": 0.012857136549677909, "grad_norm": 2.1862268447875977, "learning_rate": 4.282352941176471e-06, "loss": 1.027, "step": 364 }, { "epoch": 0.012892458353385816, "grad_norm": 2.092740297317505, "learning_rate": 4.294117647058823e-06, "loss": 1.0603, "step": 365 }, { "epoch": 0.012927780157093722, "grad_norm": 2.2306792736053467, "learning_rate": 4.305882352941177e-06, "loss": 1.0851, "step": 366 }, { "epoch": 0.012963101960801629, "grad_norm": 2.0028347969055176, "learning_rate": 4.31764705882353e-06, "loss": 1.0541, "step": 367 }, { "epoch": 0.012998423764509534, "grad_norm": 2.085022449493408, "learning_rate": 4.329411764705883e-06, "loss": 1.0368, "step": 368 }, { "epoch": 0.013033745568217441, "grad_norm": 2.1307613849639893, "learning_rate": 4.341176470588236e-06, "loss": 1.0741, "step": 369 }, { "epoch": 0.013069067371925347, "grad_norm": 2.1889872550964355, "learning_rate": 4.352941176470588e-06, "loss": 1.0975, "step": 370 }, { "epoch": 0.013104389175633254, "grad_norm": 2.2269067764282227, "learning_rate": 4.3647058823529415e-06, "loss": 1.1065, "step": 371 }, { "epoch": 0.013139710979341161, "grad_norm": 2.294867753982544, "learning_rate": 4.376470588235294e-06, "loss": 1.0581, "step": 372 }, { "epoch": 0.013175032783049066, "grad_norm": 2.0920000076293945, "learning_rate": 4.388235294117648e-06, "loss": 1.0519, "step": 373 }, { "epoch": 0.013210354586756973, "grad_norm": 2.1586577892303467, "learning_rate": 4.4e-06, "loss": 1.0659, "step": 374 }, { "epoch": 0.013245676390464879, "grad_norm": 2.204789161682129, "learning_rate": 4.411764705882353e-06, "loss": 1.0435, "step": 375 }, { "epoch": 0.013280998194172786, "grad_norm": 2.156325340270996, "learning_rate": 4.423529411764707e-06, "loss": 1.0412, "step": 376 }, { "epoch": 0.013316319997880691, "grad_norm": 2.107851028442383, "learning_rate": 4.435294117647059e-06, "loss": 1.0729, "step": 377 }, { "epoch": 0.013351641801588598, "grad_norm": 2.231900930404663, "learning_rate": 4.447058823529412e-06, "loss": 1.049, "step": 378 }, { "epoch": 0.013386963605296504, "grad_norm": 2.332439661026001, "learning_rate": 4.458823529411765e-06, "loss": 1.1075, "step": 379 }, { "epoch": 0.013422285409004411, "grad_norm": 2.4018101692199707, "learning_rate": 4.4705882352941184e-06, "loss": 1.0962, "step": 380 }, { "epoch": 0.013457607212712316, "grad_norm": 2.0478806495666504, "learning_rate": 4.482352941176471e-06, "loss": 1.0327, "step": 381 }, { "epoch": 0.013492929016420224, "grad_norm": 2.31107234954834, "learning_rate": 4.494117647058824e-06, "loss": 1.068, "step": 382 }, { "epoch": 0.01352825082012813, "grad_norm": 2.3049166202545166, "learning_rate": 4.505882352941176e-06, "loss": 1.0284, "step": 383 }, { "epoch": 0.013563572623836036, "grad_norm": 2.1293325424194336, "learning_rate": 4.51764705882353e-06, "loss": 1.0363, "step": 384 }, { "epoch": 0.013598894427543943, "grad_norm": 2.145634889602661, "learning_rate": 4.529411764705883e-06, "loss": 1.072, "step": 385 }, { "epoch": 0.013634216231251849, "grad_norm": 2.0927021503448486, "learning_rate": 4.541176470588236e-06, "loss": 1.0503, "step": 386 }, { "epoch": 0.013669538034959756, "grad_norm": 2.338296890258789, "learning_rate": 4.552941176470589e-06, "loss": 1.0628, "step": 387 }, { "epoch": 0.013704859838667661, "grad_norm": 2.1050541400909424, "learning_rate": 4.564705882352941e-06, "loss": 1.0321, "step": 388 }, { "epoch": 0.013740181642375568, "grad_norm": 2.345613479614258, "learning_rate": 4.5764705882352945e-06, "loss": 1.0382, "step": 389 }, { "epoch": 0.013775503446083474, "grad_norm": 2.143859624862671, "learning_rate": 4.588235294117647e-06, "loss": 1.0422, "step": 390 }, { "epoch": 0.01381082524979138, "grad_norm": 2.159528970718384, "learning_rate": 4.600000000000001e-06, "loss": 1.0085, "step": 391 }, { "epoch": 0.013846147053499286, "grad_norm": 2.0691022872924805, "learning_rate": 4.611764705882353e-06, "loss": 1.0684, "step": 392 }, { "epoch": 0.013881468857207193, "grad_norm": 2.205597162246704, "learning_rate": 4.623529411764706e-06, "loss": 1.0486, "step": 393 }, { "epoch": 0.0139167906609151, "grad_norm": 2.132272243499756, "learning_rate": 4.635294117647059e-06, "loss": 1.0495, "step": 394 }, { "epoch": 0.013952112464623006, "grad_norm": 1.3757280111312866, "learning_rate": 4.647058823529412e-06, "loss": 0.5611, "step": 395 }, { "epoch": 0.013987434268330913, "grad_norm": 2.4113986492156982, "learning_rate": 4.658823529411765e-06, "loss": 1.0702, "step": 396 }, { "epoch": 0.014022756072038818, "grad_norm": 2.0877671241760254, "learning_rate": 4.670588235294118e-06, "loss": 1.0641, "step": 397 }, { "epoch": 0.014058077875746726, "grad_norm": 2.2141287326812744, "learning_rate": 4.682352941176471e-06, "loss": 1.0745, "step": 398 }, { "epoch": 0.014093399679454631, "grad_norm": 2.056694269180298, "learning_rate": 4.694117647058824e-06, "loss": 1.0276, "step": 399 }, { "epoch": 0.014128721483162538, "grad_norm": 2.1312453746795654, "learning_rate": 4.705882352941177e-06, "loss": 1.0548, "step": 400 }, { "epoch": 0.014164043286870443, "grad_norm": 2.1212353706359863, "learning_rate": 4.717647058823529e-06, "loss": 1.0123, "step": 401 }, { "epoch": 0.01419936509057835, "grad_norm": 1.999153733253479, "learning_rate": 4.729411764705883e-06, "loss": 1.0361, "step": 402 }, { "epoch": 0.014234686894286256, "grad_norm": 2.196241617202759, "learning_rate": 4.741176470588236e-06, "loss": 1.0586, "step": 403 }, { "epoch": 0.014270008697994163, "grad_norm": 2.228975296020508, "learning_rate": 4.752941176470589e-06, "loss": 1.0389, "step": 404 }, { "epoch": 0.014305330501702069, "grad_norm": 2.108748435974121, "learning_rate": 4.764705882352941e-06, "loss": 1.0268, "step": 405 }, { "epoch": 0.014340652305409976, "grad_norm": 2.2382216453552246, "learning_rate": 4.776470588235294e-06, "loss": 1.0285, "step": 406 }, { "epoch": 0.014375974109117883, "grad_norm": 2.3053107261657715, "learning_rate": 4.7882352941176475e-06, "loss": 1.0651, "step": 407 }, { "epoch": 0.014411295912825788, "grad_norm": 2.1680376529693604, "learning_rate": 4.800000000000001e-06, "loss": 1.03, "step": 408 }, { "epoch": 0.014446617716533695, "grad_norm": 2.178997755050659, "learning_rate": 4.811764705882354e-06, "loss": 1.0362, "step": 409 }, { "epoch": 0.0144819395202416, "grad_norm": 2.46354079246521, "learning_rate": 4.823529411764706e-06, "loss": 1.0653, "step": 410 }, { "epoch": 0.014517261323949508, "grad_norm": 2.221879005432129, "learning_rate": 4.835294117647059e-06, "loss": 1.0296, "step": 411 }, { "epoch": 0.014552583127657413, "grad_norm": 2.1831765174865723, "learning_rate": 4.847058823529412e-06, "loss": 1.0351, "step": 412 }, { "epoch": 0.01458790493136532, "grad_norm": 2.0757429599761963, "learning_rate": 4.858823529411766e-06, "loss": 1.0502, "step": 413 }, { "epoch": 0.014623226735073226, "grad_norm": 2.082994222640991, "learning_rate": 4.870588235294118e-06, "loss": 1.0169, "step": 414 }, { "epoch": 0.014658548538781133, "grad_norm": 2.2109363079071045, "learning_rate": 4.882352941176471e-06, "loss": 1.0491, "step": 415 }, { "epoch": 0.014693870342489038, "grad_norm": 2.337096691131592, "learning_rate": 4.894117647058824e-06, "loss": 1.0325, "step": 416 }, { "epoch": 0.014729192146196946, "grad_norm": 2.6265218257904053, "learning_rate": 4.905882352941177e-06, "loss": 1.0747, "step": 417 }, { "epoch": 0.014764513949904853, "grad_norm": 2.1464269161224365, "learning_rate": 4.91764705882353e-06, "loss": 1.0926, "step": 418 }, { "epoch": 0.014799835753612758, "grad_norm": 2.268871307373047, "learning_rate": 4.929411764705882e-06, "loss": 1.0847, "step": 419 }, { "epoch": 0.014835157557320665, "grad_norm": 2.1036980152130127, "learning_rate": 4.941176470588236e-06, "loss": 1.0583, "step": 420 }, { "epoch": 0.01487047936102857, "grad_norm": 2.5591142177581787, "learning_rate": 4.9529411764705885e-06, "loss": 1.0435, "step": 421 }, { "epoch": 0.014905801164736478, "grad_norm": 1.977989912033081, "learning_rate": 4.964705882352942e-06, "loss": 1.0434, "step": 422 }, { "epoch": 0.014941122968444383, "grad_norm": 2.370812177658081, "learning_rate": 4.976470588235294e-06, "loss": 1.0819, "step": 423 }, { "epoch": 0.01497644477215229, "grad_norm": 2.201378107070923, "learning_rate": 4.988235294117647e-06, "loss": 1.0224, "step": 424 }, { "epoch": 0.015011766575860196, "grad_norm": 2.1455070972442627, "learning_rate": 5e-06, "loss": 1.0368, "step": 425 }, { "epoch": 0.015047088379568103, "grad_norm": 2.176729917526245, "learning_rate": 5.011764705882354e-06, "loss": 1.0453, "step": 426 }, { "epoch": 0.015082410183276008, "grad_norm": 2.27405047416687, "learning_rate": 5.023529411764706e-06, "loss": 1.0531, "step": 427 }, { "epoch": 0.015117731986983915, "grad_norm": 2.4808363914489746, "learning_rate": 5.035294117647059e-06, "loss": 1.0497, "step": 428 }, { "epoch": 0.015153053790691822, "grad_norm": 2.2510664463043213, "learning_rate": 5.047058823529412e-06, "loss": 1.0414, "step": 429 }, { "epoch": 0.015188375594399728, "grad_norm": 2.1950273513793945, "learning_rate": 5.058823529411765e-06, "loss": 1.0736, "step": 430 }, { "epoch": 0.015223697398107635, "grad_norm": 2.1781342029571533, "learning_rate": 5.070588235294119e-06, "loss": 1.0355, "step": 431 }, { "epoch": 0.01525901920181554, "grad_norm": 2.0454907417297363, "learning_rate": 5.08235294117647e-06, "loss": 1.0279, "step": 432 }, { "epoch": 0.015294341005523448, "grad_norm": 2.855381488800049, "learning_rate": 5.094117647058824e-06, "loss": 1.0506, "step": 433 }, { "epoch": 0.015329662809231353, "grad_norm": 2.213148355484009, "learning_rate": 5.105882352941177e-06, "loss": 1.0569, "step": 434 }, { "epoch": 0.01536498461293926, "grad_norm": 2.1584696769714355, "learning_rate": 5.11764705882353e-06, "loss": 1.0684, "step": 435 }, { "epoch": 0.015400306416647165, "grad_norm": 2.1869168281555176, "learning_rate": 5.129411764705883e-06, "loss": 1.074, "step": 436 }, { "epoch": 0.015435628220355073, "grad_norm": 2.3133585453033447, "learning_rate": 5.141176470588236e-06, "loss": 1.021, "step": 437 }, { "epoch": 0.015470950024062978, "grad_norm": 2.294043779373169, "learning_rate": 5.152941176470588e-06, "loss": 1.0653, "step": 438 }, { "epoch": 0.015506271827770885, "grad_norm": 2.244575262069702, "learning_rate": 5.1647058823529415e-06, "loss": 1.0744, "step": 439 }, { "epoch": 0.015541593631478792, "grad_norm": 2.16489315032959, "learning_rate": 5.176470588235295e-06, "loss": 1.0667, "step": 440 }, { "epoch": 0.015576915435186698, "grad_norm": 2.092787742614746, "learning_rate": 5.188235294117647e-06, "loss": 1.0129, "step": 441 }, { "epoch": 0.015612237238894605, "grad_norm": 2.3100364208221436, "learning_rate": 5.2e-06, "loss": 1.0803, "step": 442 }, { "epoch": 0.01564755904260251, "grad_norm": 2.1919615268707275, "learning_rate": 5.2117647058823525e-06, "loss": 1.0563, "step": 443 }, { "epoch": 0.015682880846310417, "grad_norm": 2.1023223400115967, "learning_rate": 5.2235294117647065e-06, "loss": 1.0609, "step": 444 }, { "epoch": 0.015718202650018324, "grad_norm": 1.7141788005828857, "learning_rate": 5.23529411764706e-06, "loss": 0.57, "step": 445 }, { "epoch": 0.015753524453726228, "grad_norm": 2.5133748054504395, "learning_rate": 5.247058823529412e-06, "loss": 1.0626, "step": 446 }, { "epoch": 0.015788846257434135, "grad_norm": 2.5930285453796387, "learning_rate": 5.258823529411765e-06, "loss": 1.0646, "step": 447 }, { "epoch": 0.015824168061142042, "grad_norm": 2.340937376022339, "learning_rate": 5.270588235294118e-06, "loss": 1.0304, "step": 448 }, { "epoch": 0.01585948986484995, "grad_norm": 2.2440717220306396, "learning_rate": 5.282352941176471e-06, "loss": 1.0536, "step": 449 }, { "epoch": 0.015894811668557853, "grad_norm": 2.5076591968536377, "learning_rate": 5.294117647058824e-06, "loss": 1.0427, "step": 450 }, { "epoch": 0.01593013347226576, "grad_norm": 2.198049306869507, "learning_rate": 5.305882352941177e-06, "loss": 1.0424, "step": 451 }, { "epoch": 0.015965455275973667, "grad_norm": 2.3152127265930176, "learning_rate": 5.317647058823529e-06, "loss": 1.0332, "step": 452 }, { "epoch": 0.016000777079681575, "grad_norm": 2.2705001831054688, "learning_rate": 5.329411764705883e-06, "loss": 1.072, "step": 453 }, { "epoch": 0.01603609888338948, "grad_norm": 2.1002748012542725, "learning_rate": 5.341176470588235e-06, "loss": 1.0506, "step": 454 }, { "epoch": 0.016071420687097385, "grad_norm": 2.2497711181640625, "learning_rate": 5.352941176470589e-06, "loss": 1.0727, "step": 455 }, { "epoch": 0.016106742490805293, "grad_norm": 2.2618160247802734, "learning_rate": 5.364705882352942e-06, "loss": 1.0537, "step": 456 }, { "epoch": 0.0161420642945132, "grad_norm": 2.4222958087921143, "learning_rate": 5.3764705882352945e-06, "loss": 1.0536, "step": 457 }, { "epoch": 0.016177386098221107, "grad_norm": 2.1262552738189697, "learning_rate": 5.388235294117648e-06, "loss": 1.0112, "step": 458 }, { "epoch": 0.01621270790192901, "grad_norm": 2.0724430084228516, "learning_rate": 5.400000000000001e-06, "loss": 0.9894, "step": 459 }, { "epoch": 0.016248029705636918, "grad_norm": 2.1216604709625244, "learning_rate": 5.411764705882353e-06, "loss": 1.0149, "step": 460 }, { "epoch": 0.016283351509344825, "grad_norm": 2.1934258937835693, "learning_rate": 5.423529411764706e-06, "loss": 1.0403, "step": 461 }, { "epoch": 0.016318673313052732, "grad_norm": 2.0777227878570557, "learning_rate": 5.4352941176470595e-06, "loss": 1.0319, "step": 462 }, { "epoch": 0.01635399511676064, "grad_norm": 2.1888911724090576, "learning_rate": 5.447058823529412e-06, "loss": 1.0003, "step": 463 }, { "epoch": 0.016389316920468543, "grad_norm": 2.156463384628296, "learning_rate": 5.458823529411765e-06, "loss": 0.9971, "step": 464 }, { "epoch": 0.01642463872417645, "grad_norm": 2.182448625564575, "learning_rate": 5.470588235294119e-06, "loss": 1.0622, "step": 465 }, { "epoch": 0.016459960527884357, "grad_norm": 2.5405173301696777, "learning_rate": 5.4823529411764705e-06, "loss": 1.0208, "step": 466 }, { "epoch": 0.016495282331592264, "grad_norm": 3.4104673862457275, "learning_rate": 5.4941176470588245e-06, "loss": 1.0738, "step": 467 }, { "epoch": 0.016530604135300168, "grad_norm": 2.140434980392456, "learning_rate": 5.505882352941177e-06, "loss": 1.0744, "step": 468 }, { "epoch": 0.016565925939008075, "grad_norm": 2.415468454360962, "learning_rate": 5.51764705882353e-06, "loss": 1.0401, "step": 469 }, { "epoch": 0.016601247742715982, "grad_norm": 2.1620333194732666, "learning_rate": 5.529411764705883e-06, "loss": 0.9932, "step": 470 }, { "epoch": 0.01663656954642389, "grad_norm": 2.4131669998168945, "learning_rate": 5.5411764705882356e-06, "loss": 1.0295, "step": 471 }, { "epoch": 0.016671891350131793, "grad_norm": 2.2203421592712402, "learning_rate": 5.552941176470589e-06, "loss": 1.0627, "step": 472 }, { "epoch": 0.0167072131538397, "grad_norm": 2.0976526737213135, "learning_rate": 5.564705882352942e-06, "loss": 1.0038, "step": 473 }, { "epoch": 0.016742534957547607, "grad_norm": 2.110264539718628, "learning_rate": 5.576470588235294e-06, "loss": 1.0312, "step": 474 }, { "epoch": 0.016777856761255514, "grad_norm": 2.1976101398468018, "learning_rate": 5.588235294117647e-06, "loss": 1.0804, "step": 475 }, { "epoch": 0.01681317856496342, "grad_norm": 2.1322169303894043, "learning_rate": 5.600000000000001e-06, "loss": 1.0267, "step": 476 }, { "epoch": 0.016848500368671325, "grad_norm": 2.293363094329834, "learning_rate": 5.611764705882353e-06, "loss": 1.0435, "step": 477 }, { "epoch": 0.016883822172379232, "grad_norm": 2.1159355640411377, "learning_rate": 5.623529411764707e-06, "loss": 1.035, "step": 478 }, { "epoch": 0.01691914397608714, "grad_norm": 2.103666305541992, "learning_rate": 5.6352941176470584e-06, "loss": 0.9767, "step": 479 }, { "epoch": 0.016954465779795046, "grad_norm": 2.0624775886535645, "learning_rate": 5.6470588235294125e-06, "loss": 1.0241, "step": 480 }, { "epoch": 0.01698978758350295, "grad_norm": 2.3333144187927246, "learning_rate": 5.658823529411766e-06, "loss": 1.045, "step": 481 }, { "epoch": 0.017025109387210857, "grad_norm": 2.2154479026794434, "learning_rate": 5.670588235294118e-06, "loss": 1.0401, "step": 482 }, { "epoch": 0.017060431190918764, "grad_norm": 2.2446274757385254, "learning_rate": 5.682352941176471e-06, "loss": 1.0031, "step": 483 }, { "epoch": 0.01709575299462667, "grad_norm": 2.255295991897583, "learning_rate": 5.694117647058824e-06, "loss": 1.0077, "step": 484 }, { "epoch": 0.01713107479833458, "grad_norm": 2.0901665687561035, "learning_rate": 5.705882352941177e-06, "loss": 1.0009, "step": 485 }, { "epoch": 0.017166396602042482, "grad_norm": 2.2607898712158203, "learning_rate": 5.71764705882353e-06, "loss": 1.0329, "step": 486 }, { "epoch": 0.01720171840575039, "grad_norm": 2.422051191329956, "learning_rate": 5.729411764705883e-06, "loss": 1.0263, "step": 487 }, { "epoch": 0.017237040209458297, "grad_norm": 2.018781900405884, "learning_rate": 5.741176470588235e-06, "loss": 1.0315, "step": 488 }, { "epoch": 0.017272362013166204, "grad_norm": 2.0793721675872803, "learning_rate": 5.7529411764705885e-06, "loss": 1.0339, "step": 489 }, { "epoch": 0.017307683816874107, "grad_norm": 1.9729067087173462, "learning_rate": 5.764705882352941e-06, "loss": 1.0302, "step": 490 }, { "epoch": 0.017343005620582014, "grad_norm": 2.0371861457824707, "learning_rate": 5.776470588235295e-06, "loss": 1.0448, "step": 491 }, { "epoch": 0.01737832742428992, "grad_norm": 2.691761016845703, "learning_rate": 5.788235294117648e-06, "loss": 1.0755, "step": 492 }, { "epoch": 0.01741364922799783, "grad_norm": 2.03070330619812, "learning_rate": 5.8e-06, "loss": 1.0341, "step": 493 }, { "epoch": 0.017448971031705732, "grad_norm": 2.2321417331695557, "learning_rate": 5.8117647058823536e-06, "loss": 0.9975, "step": 494 }, { "epoch": 0.01748429283541364, "grad_norm": 2.0017569065093994, "learning_rate": 5.823529411764707e-06, "loss": 0.9939, "step": 495 }, { "epoch": 0.017519614639121547, "grad_norm": 2.1238951683044434, "learning_rate": 5.835294117647059e-06, "loss": 1.0182, "step": 496 }, { "epoch": 0.017554936442829454, "grad_norm": 2.183018684387207, "learning_rate": 5.847058823529412e-06, "loss": 1.0041, "step": 497 }, { "epoch": 0.01759025824653736, "grad_norm": 2.4114320278167725, "learning_rate": 5.858823529411765e-06, "loss": 1.022, "step": 498 }, { "epoch": 0.017625580050245265, "grad_norm": 2.185688018798828, "learning_rate": 5.870588235294118e-06, "loss": 1.0682, "step": 499 }, { "epoch": 0.017660901853953172, "grad_norm": 2.2464067935943604, "learning_rate": 5.882352941176471e-06, "loss": 1.0418, "step": 500 }, { "epoch": 0.01769622365766108, "grad_norm": 2.1693520545959473, "learning_rate": 5.894117647058823e-06, "loss": 0.9845, "step": 501 }, { "epoch": 0.017731545461368986, "grad_norm": 2.317948579788208, "learning_rate": 5.9058823529411764e-06, "loss": 1.0148, "step": 502 }, { "epoch": 0.01776686726507689, "grad_norm": 2.369387149810791, "learning_rate": 5.9176470588235305e-06, "loss": 1.0751, "step": 503 }, { "epoch": 0.017802189068784797, "grad_norm": 2.050786256790161, "learning_rate": 5.929411764705883e-06, "loss": 1.0106, "step": 504 }, { "epoch": 0.017837510872492704, "grad_norm": 2.230949640274048, "learning_rate": 5.941176470588236e-06, "loss": 1.0177, "step": 505 }, { "epoch": 0.01787283267620061, "grad_norm": 2.312265634536743, "learning_rate": 5.952941176470589e-06, "loss": 1.0255, "step": 506 }, { "epoch": 0.017908154479908518, "grad_norm": 2.1273233890533447, "learning_rate": 5.9647058823529415e-06, "loss": 1.0023, "step": 507 }, { "epoch": 0.017943476283616422, "grad_norm": 2.263514757156372, "learning_rate": 5.976470588235295e-06, "loss": 1.0297, "step": 508 }, { "epoch": 0.01797879808732433, "grad_norm": 2.198049783706665, "learning_rate": 5.988235294117648e-06, "loss": 1.0171, "step": 509 }, { "epoch": 0.018014119891032236, "grad_norm": 2.015594482421875, "learning_rate": 6e-06, "loss": 1.021, "step": 510 }, { "epoch": 0.018049441694740143, "grad_norm": 2.0430421829223633, "learning_rate": 6.011764705882353e-06, "loss": 1.0318, "step": 511 }, { "epoch": 0.018084763498448047, "grad_norm": 2.0895683765411377, "learning_rate": 6.023529411764706e-06, "loss": 0.9948, "step": 512 }, { "epoch": 0.018120085302155954, "grad_norm": 2.091069221496582, "learning_rate": 6.035294117647059e-06, "loss": 0.9972, "step": 513 }, { "epoch": 0.01815540710586386, "grad_norm": 2.164262294769287, "learning_rate": 6.047058823529413e-06, "loss": 1.0472, "step": 514 }, { "epoch": 0.01819072890957177, "grad_norm": 2.0872700214385986, "learning_rate": 6.058823529411765e-06, "loss": 0.9969, "step": 515 }, { "epoch": 0.018226050713279672, "grad_norm": 2.065896987915039, "learning_rate": 6.070588235294118e-06, "loss": 1.0216, "step": 516 }, { "epoch": 0.01826137251698758, "grad_norm": 2.252789258956909, "learning_rate": 6.0823529411764715e-06, "loss": 1.053, "step": 517 }, { "epoch": 0.018296694320695486, "grad_norm": 2.3154289722442627, "learning_rate": 6.094117647058824e-06, "loss": 1.0293, "step": 518 }, { "epoch": 0.018332016124403393, "grad_norm": 2.176436185836792, "learning_rate": 6.105882352941177e-06, "loss": 1.0267, "step": 519 }, { "epoch": 0.0183673379281113, "grad_norm": 2.1680850982666016, "learning_rate": 6.11764705882353e-06, "loss": 1.014, "step": 520 }, { "epoch": 0.018402659731819204, "grad_norm": 2.066922664642334, "learning_rate": 6.1294117647058826e-06, "loss": 1.0332, "step": 521 }, { "epoch": 0.01843798153552711, "grad_norm": 2.6691155433654785, "learning_rate": 6.141176470588236e-06, "loss": 1.0563, "step": 522 }, { "epoch": 0.01847330333923502, "grad_norm": 2.1373555660247803, "learning_rate": 6.152941176470588e-06, "loss": 0.9867, "step": 523 }, { "epoch": 0.018508625142942926, "grad_norm": 2.19852352142334, "learning_rate": 6.164705882352941e-06, "loss": 1.0239, "step": 524 }, { "epoch": 0.01854394694665083, "grad_norm": 2.189573049545288, "learning_rate": 6.176470588235295e-06, "loss": 1.0071, "step": 525 }, { "epoch": 0.018579268750358736, "grad_norm": 2.2143843173980713, "learning_rate": 6.188235294117647e-06, "loss": 1.0215, "step": 526 }, { "epoch": 0.018614590554066644, "grad_norm": 2.1640849113464355, "learning_rate": 6.200000000000001e-06, "loss": 1.0253, "step": 527 }, { "epoch": 0.01864991235777455, "grad_norm": 2.1837713718414307, "learning_rate": 6.211764705882354e-06, "loss": 1.0127, "step": 528 }, { "epoch": 0.018685234161482458, "grad_norm": 2.0127527713775635, "learning_rate": 6.223529411764706e-06, "loss": 1.0054, "step": 529 }, { "epoch": 0.01872055596519036, "grad_norm": 2.476789712905884, "learning_rate": 6.2352941176470595e-06, "loss": 0.9969, "step": 530 }, { "epoch": 0.01875587776889827, "grad_norm": 2.220402717590332, "learning_rate": 6.247058823529413e-06, "loss": 1.0151, "step": 531 }, { "epoch": 0.018791199572606176, "grad_norm": 2.144726276397705, "learning_rate": 6.258823529411765e-06, "loss": 1.0353, "step": 532 }, { "epoch": 0.018826521376314083, "grad_norm": 2.066599130630493, "learning_rate": 6.270588235294118e-06, "loss": 1.0335, "step": 533 }, { "epoch": 0.018861843180021987, "grad_norm": 2.274198055267334, "learning_rate": 6.282352941176471e-06, "loss": 1.0063, "step": 534 }, { "epoch": 0.018897164983729894, "grad_norm": 2.0381784439086914, "learning_rate": 6.294117647058824e-06, "loss": 1.0046, "step": 535 }, { "epoch": 0.0189324867874378, "grad_norm": 2.193995475769043, "learning_rate": 6.305882352941177e-06, "loss": 1.0541, "step": 536 }, { "epoch": 0.018967808591145708, "grad_norm": 2.1314337253570557, "learning_rate": 6.317647058823529e-06, "loss": 1.0176, "step": 537 }, { "epoch": 0.01900313039485361, "grad_norm": 2.1783955097198486, "learning_rate": 6.329411764705883e-06, "loss": 1.0557, "step": 538 }, { "epoch": 0.01903845219856152, "grad_norm": 2.2050623893737793, "learning_rate": 6.341176470588236e-06, "loss": 1.0376, "step": 539 }, { "epoch": 0.019073774002269426, "grad_norm": 2.108335494995117, "learning_rate": 6.352941176470589e-06, "loss": 1.0234, "step": 540 }, { "epoch": 0.019109095805977333, "grad_norm": 2.120414972305298, "learning_rate": 6.364705882352942e-06, "loss": 1.063, "step": 541 }, { "epoch": 0.01914441760968524, "grad_norm": 2.07645583152771, "learning_rate": 6.376470588235295e-06, "loss": 1.0069, "step": 542 }, { "epoch": 0.019179739413393144, "grad_norm": 2.192124128341675, "learning_rate": 6.388235294117647e-06, "loss": 1.0492, "step": 543 }, { "epoch": 0.01921506121710105, "grad_norm": 2.2553675174713135, "learning_rate": 6.4000000000000006e-06, "loss": 1.043, "step": 544 }, { "epoch": 0.019250383020808958, "grad_norm": 1.9184147119522095, "learning_rate": 6.411764705882354e-06, "loss": 1.0182, "step": 545 }, { "epoch": 0.019285704824516865, "grad_norm": 2.064039945602417, "learning_rate": 6.423529411764706e-06, "loss": 0.9938, "step": 546 }, { "epoch": 0.01932102662822477, "grad_norm": 2.012089252471924, "learning_rate": 6.435294117647059e-06, "loss": 1.0238, "step": 547 }, { "epoch": 0.019356348431932676, "grad_norm": 2.0161242485046387, "learning_rate": 6.4470588235294116e-06, "loss": 1.0315, "step": 548 }, { "epoch": 0.019391670235640583, "grad_norm": 2.2374298572540283, "learning_rate": 6.458823529411765e-06, "loss": 1.0449, "step": 549 }, { "epoch": 0.01942699203934849, "grad_norm": 2.1275839805603027, "learning_rate": 6.470588235294119e-06, "loss": 1.0294, "step": 550 }, { "epoch": 0.019462313843056394, "grad_norm": 2.0263943672180176, "learning_rate": 6.482352941176471e-06, "loss": 1.001, "step": 551 }, { "epoch": 0.0194976356467643, "grad_norm": 2.0503122806549072, "learning_rate": 6.494117647058824e-06, "loss": 1.0246, "step": 552 }, { "epoch": 0.019532957450472208, "grad_norm": 2.0545966625213623, "learning_rate": 6.5058823529411775e-06, "loss": 0.9888, "step": 553 }, { "epoch": 0.019568279254180115, "grad_norm": 2.281379461288452, "learning_rate": 6.51764705882353e-06, "loss": 1.0551, "step": 554 }, { "epoch": 0.019603601057888023, "grad_norm": 1.984341025352478, "learning_rate": 6.529411764705883e-06, "loss": 1.0155, "step": 555 }, { "epoch": 0.019638922861595926, "grad_norm": 2.079293966293335, "learning_rate": 6.541176470588236e-06, "loss": 1.0055, "step": 556 }, { "epoch": 0.019674244665303833, "grad_norm": 2.1063926219940186, "learning_rate": 6.5529411764705885e-06, "loss": 1.0244, "step": 557 }, { "epoch": 0.01970956646901174, "grad_norm": 2.011392116546631, "learning_rate": 6.564705882352942e-06, "loss": 1.0072, "step": 558 }, { "epoch": 0.019744888272719648, "grad_norm": 1.7282251119613647, "learning_rate": 6.576470588235294e-06, "loss": 0.5762, "step": 559 }, { "epoch": 0.01978021007642755, "grad_norm": 2.154761552810669, "learning_rate": 6.588235294117647e-06, "loss": 1.0343, "step": 560 }, { "epoch": 0.01981553188013546, "grad_norm": 2.3325726985931396, "learning_rate": 6.600000000000001e-06, "loss": 1.0282, "step": 561 }, { "epoch": 0.019850853683843366, "grad_norm": 2.0916285514831543, "learning_rate": 6.611764705882353e-06, "loss": 1.0399, "step": 562 }, { "epoch": 0.019886175487551273, "grad_norm": 2.044177770614624, "learning_rate": 6.623529411764707e-06, "loss": 1.044, "step": 563 }, { "epoch": 0.01992149729125918, "grad_norm": 2.042271137237549, "learning_rate": 6.63529411764706e-06, "loss": 1.0404, "step": 564 }, { "epoch": 0.019956819094967083, "grad_norm": 2.357544183731079, "learning_rate": 6.647058823529412e-06, "loss": 1.0571, "step": 565 }, { "epoch": 0.01999214089867499, "grad_norm": 2.242161512374878, "learning_rate": 6.658823529411765e-06, "loss": 0.9875, "step": 566 }, { "epoch": 0.020027462702382898, "grad_norm": 2.1038200855255127, "learning_rate": 6.6705882352941186e-06, "loss": 1.0374, "step": 567 }, { "epoch": 0.020062784506090805, "grad_norm": 2.007030487060547, "learning_rate": 6.682352941176471e-06, "loss": 1.0272, "step": 568 }, { "epoch": 0.02009810630979871, "grad_norm": 2.166874885559082, "learning_rate": 6.694117647058824e-06, "loss": 1.0173, "step": 569 }, { "epoch": 0.020133428113506616, "grad_norm": 2.212344169616699, "learning_rate": 6.705882352941176e-06, "loss": 0.9643, "step": 570 }, { "epoch": 0.020168749917214523, "grad_norm": 2.131882905960083, "learning_rate": 6.7176470588235296e-06, "loss": 1.027, "step": 571 }, { "epoch": 0.02020407172092243, "grad_norm": 2.1393659114837646, "learning_rate": 6.729411764705884e-06, "loss": 1.0275, "step": 572 }, { "epoch": 0.020239393524630334, "grad_norm": 2.4087207317352295, "learning_rate": 6.741176470588235e-06, "loss": 0.9787, "step": 573 }, { "epoch": 0.02027471532833824, "grad_norm": 2.0898396968841553, "learning_rate": 6.752941176470589e-06, "loss": 1.0337, "step": 574 }, { "epoch": 0.020310037132046148, "grad_norm": 2.1865146160125732, "learning_rate": 6.764705882352942e-06, "loss": 1.0472, "step": 575 }, { "epoch": 0.020345358935754055, "grad_norm": 2.080885648727417, "learning_rate": 6.776470588235295e-06, "loss": 0.9872, "step": 576 }, { "epoch": 0.020380680739461962, "grad_norm": 2.3530657291412354, "learning_rate": 6.788235294117648e-06, "loss": 1.0565, "step": 577 }, { "epoch": 0.020416002543169866, "grad_norm": 2.198371648788452, "learning_rate": 6.800000000000001e-06, "loss": 1.0312, "step": 578 }, { "epoch": 0.020451324346877773, "grad_norm": 2.0296480655670166, "learning_rate": 6.811764705882353e-06, "loss": 1.0222, "step": 579 }, { "epoch": 0.02048664615058568, "grad_norm": 2.111527681350708, "learning_rate": 6.8235294117647065e-06, "loss": 1.0453, "step": 580 }, { "epoch": 0.020521967954293587, "grad_norm": 2.0369014739990234, "learning_rate": 6.835294117647059e-06, "loss": 0.9946, "step": 581 }, { "epoch": 0.02055728975800149, "grad_norm": 2.1450724601745605, "learning_rate": 6.847058823529412e-06, "loss": 1.0315, "step": 582 }, { "epoch": 0.020592611561709398, "grad_norm": 1.976934790611267, "learning_rate": 6.858823529411765e-06, "loss": 0.9821, "step": 583 }, { "epoch": 0.020627933365417305, "grad_norm": 2.1058692932128906, "learning_rate": 6.8705882352941175e-06, "loss": 1.0326, "step": 584 }, { "epoch": 0.020663255169125212, "grad_norm": 2.169536590576172, "learning_rate": 6.8823529411764715e-06, "loss": 0.9896, "step": 585 }, { "epoch": 0.02069857697283312, "grad_norm": 2.118912935256958, "learning_rate": 6.894117647058825e-06, "loss": 1.0295, "step": 586 }, { "epoch": 0.020733898776541023, "grad_norm": 1.7159719467163086, "learning_rate": 6.905882352941177e-06, "loss": 0.5808, "step": 587 }, { "epoch": 0.02076922058024893, "grad_norm": 2.2175235748291016, "learning_rate": 6.91764705882353e-06, "loss": 1.0322, "step": 588 }, { "epoch": 0.020804542383956837, "grad_norm": 2.3294973373413086, "learning_rate": 6.929411764705883e-06, "loss": 1.0399, "step": 589 }, { "epoch": 0.020839864187664744, "grad_norm": 2.2231767177581787, "learning_rate": 6.941176470588236e-06, "loss": 1.0101, "step": 590 }, { "epoch": 0.020875185991372648, "grad_norm": 1.2283110618591309, "learning_rate": 6.952941176470589e-06, "loss": 0.6001, "step": 591 }, { "epoch": 0.020910507795080555, "grad_norm": 2.2132866382598877, "learning_rate": 6.964705882352941e-06, "loss": 1.0627, "step": 592 }, { "epoch": 0.020945829598788462, "grad_norm": 2.2450900077819824, "learning_rate": 6.976470588235294e-06, "loss": 1.0483, "step": 593 }, { "epoch": 0.02098115140249637, "grad_norm": 2.1578550338745117, "learning_rate": 6.9882352941176476e-06, "loss": 1.0054, "step": 594 }, { "epoch": 0.021016473206204273, "grad_norm": 2.224405288696289, "learning_rate": 7e-06, "loss": 1.0294, "step": 595 }, { "epoch": 0.02105179500991218, "grad_norm": 2.064011335372925, "learning_rate": 7.011764705882353e-06, "loss": 1.0327, "step": 596 }, { "epoch": 0.021087116813620087, "grad_norm": 2.01932430267334, "learning_rate": 7.023529411764707e-06, "loss": 0.9962, "step": 597 }, { "epoch": 0.021122438617327995, "grad_norm": 2.2953102588653564, "learning_rate": 7.0352941176470594e-06, "loss": 1.0167, "step": 598 }, { "epoch": 0.021157760421035902, "grad_norm": 2.1916589736938477, "learning_rate": 7.047058823529413e-06, "loss": 1.0259, "step": 599 }, { "epoch": 0.021193082224743805, "grad_norm": 1.98945152759552, "learning_rate": 7.058823529411766e-06, "loss": 0.9856, "step": 600 }, { "epoch": 0.021228404028451713, "grad_norm": 2.1945748329162598, "learning_rate": 7.070588235294118e-06, "loss": 1.0361, "step": 601 }, { "epoch": 0.02126372583215962, "grad_norm": 1.9742745161056519, "learning_rate": 7.082352941176471e-06, "loss": 1.0218, "step": 602 }, { "epoch": 0.021299047635867527, "grad_norm": 2.012190341949463, "learning_rate": 7.0941176470588245e-06, "loss": 1.0034, "step": 603 }, { "epoch": 0.02133436943957543, "grad_norm": 2.1530234813690186, "learning_rate": 7.105882352941177e-06, "loss": 1.0123, "step": 604 }, { "epoch": 0.021369691243283338, "grad_norm": 1.934767723083496, "learning_rate": 7.11764705882353e-06, "loss": 0.9892, "step": 605 }, { "epoch": 0.021405013046991245, "grad_norm": 2.1524105072021484, "learning_rate": 7.129411764705882e-06, "loss": 1.0008, "step": 606 }, { "epoch": 0.021440334850699152, "grad_norm": 2.2505874633789062, "learning_rate": 7.1411764705882355e-06, "loss": 1.0431, "step": 607 }, { "epoch": 0.02147565665440706, "grad_norm": 2.7865819931030273, "learning_rate": 7.1529411764705895e-06, "loss": 0.9827, "step": 608 }, { "epoch": 0.021510978458114963, "grad_norm": 2.111116647720337, "learning_rate": 7.164705882352941e-06, "loss": 1.0033, "step": 609 }, { "epoch": 0.02154630026182287, "grad_norm": 2.0736401081085205, "learning_rate": 7.176470588235295e-06, "loss": 1.0195, "step": 610 }, { "epoch": 0.021581622065530777, "grad_norm": 1.9519596099853516, "learning_rate": 7.188235294117648e-06, "loss": 1.0061, "step": 611 }, { "epoch": 0.021616943869238684, "grad_norm": 2.1942193508148193, "learning_rate": 7.2000000000000005e-06, "loss": 1.0079, "step": 612 }, { "epoch": 0.021652265672946588, "grad_norm": 2.144805908203125, "learning_rate": 7.211764705882354e-06, "loss": 0.9724, "step": 613 }, { "epoch": 0.021687587476654495, "grad_norm": 2.1938111782073975, "learning_rate": 7.223529411764707e-06, "loss": 1.0397, "step": 614 }, { "epoch": 0.021722909280362402, "grad_norm": 2.0302560329437256, "learning_rate": 7.235294117647059e-06, "loss": 1.0233, "step": 615 }, { "epoch": 0.02175823108407031, "grad_norm": 2.0127079486846924, "learning_rate": 7.247058823529412e-06, "loss": 0.9907, "step": 616 }, { "epoch": 0.021793552887778213, "grad_norm": 2.189145565032959, "learning_rate": 7.258823529411765e-06, "loss": 1.0517, "step": 617 }, { "epoch": 0.02182887469148612, "grad_norm": 2.1780571937561035, "learning_rate": 7.270588235294118e-06, "loss": 0.991, "step": 618 }, { "epoch": 0.021864196495194027, "grad_norm": 2.2299768924713135, "learning_rate": 7.282352941176472e-06, "loss": 0.9532, "step": 619 }, { "epoch": 0.021899518298901934, "grad_norm": 2.388842821121216, "learning_rate": 7.294117647058823e-06, "loss": 0.9921, "step": 620 }, { "epoch": 0.02193484010260984, "grad_norm": 2.011780261993408, "learning_rate": 7.305882352941177e-06, "loss": 1.0057, "step": 621 }, { "epoch": 0.021970161906317745, "grad_norm": 2.1733686923980713, "learning_rate": 7.317647058823531e-06, "loss": 1.0004, "step": 622 }, { "epoch": 0.022005483710025652, "grad_norm": 2.141512393951416, "learning_rate": 7.329411764705883e-06, "loss": 1.0007, "step": 623 }, { "epoch": 0.02204080551373356, "grad_norm": 2.1439383029937744, "learning_rate": 7.341176470588236e-06, "loss": 0.983, "step": 624 }, { "epoch": 0.022076127317441466, "grad_norm": 2.035336494445801, "learning_rate": 7.352941176470589e-06, "loss": 1.0056, "step": 625 }, { "epoch": 0.02211144912114937, "grad_norm": 2.062842607498169, "learning_rate": 7.364705882352942e-06, "loss": 0.973, "step": 626 }, { "epoch": 0.022146770924857277, "grad_norm": 2.0276072025299072, "learning_rate": 7.376470588235295e-06, "loss": 0.9909, "step": 627 }, { "epoch": 0.022182092728565184, "grad_norm": 2.016761302947998, "learning_rate": 7.388235294117647e-06, "loss": 0.9729, "step": 628 }, { "epoch": 0.02221741453227309, "grad_norm": 2.0364396572113037, "learning_rate": 7.4e-06, "loss": 0.9684, "step": 629 }, { "epoch": 0.022252736335980995, "grad_norm": 2.1312994956970215, "learning_rate": 7.4117647058823535e-06, "loss": 1.0857, "step": 630 }, { "epoch": 0.022288058139688902, "grad_norm": 2.216792345046997, "learning_rate": 7.423529411764706e-06, "loss": 1.0357, "step": 631 }, { "epoch": 0.02232337994339681, "grad_norm": 2.158803939819336, "learning_rate": 7.43529411764706e-06, "loss": 1.0129, "step": 632 }, { "epoch": 0.022358701747104717, "grad_norm": 2.0631678104400635, "learning_rate": 7.447058823529413e-06, "loss": 1.0053, "step": 633 }, { "epoch": 0.022394023550812624, "grad_norm": 2.2049858570098877, "learning_rate": 7.458823529411765e-06, "loss": 0.9919, "step": 634 }, { "epoch": 0.022429345354520527, "grad_norm": 1.9995439052581787, "learning_rate": 7.4705882352941185e-06, "loss": 1.004, "step": 635 }, { "epoch": 0.022464667158228434, "grad_norm": 2.1220648288726807, "learning_rate": 7.482352941176472e-06, "loss": 1.0043, "step": 636 }, { "epoch": 0.02249998896193634, "grad_norm": 2.1256046295166016, "learning_rate": 7.494117647058824e-06, "loss": 1.0402, "step": 637 }, { "epoch": 0.02253531076564425, "grad_norm": 1.9780827760696411, "learning_rate": 7.505882352941177e-06, "loss": 0.9796, "step": 638 }, { "epoch": 0.022570632569352152, "grad_norm": 1.9116226434707642, "learning_rate": 7.5176470588235295e-06, "loss": 0.9728, "step": 639 }, { "epoch": 0.02260595437306006, "grad_norm": 2.279784679412842, "learning_rate": 7.529411764705883e-06, "loss": 1.0025, "step": 640 }, { "epoch": 0.022641276176767967, "grad_norm": 2.174588680267334, "learning_rate": 7.541176470588236e-06, "loss": 1.0287, "step": 641 }, { "epoch": 0.022676597980475874, "grad_norm": 2.126497745513916, "learning_rate": 7.552941176470588e-06, "loss": 1.0175, "step": 642 }, { "epoch": 0.02271191978418378, "grad_norm": 2.2456581592559814, "learning_rate": 7.564705882352941e-06, "loss": 0.9952, "step": 643 }, { "epoch": 0.022747241587891685, "grad_norm": 1.934963345527649, "learning_rate": 7.576470588235295e-06, "loss": 0.9828, "step": 644 }, { "epoch": 0.022782563391599592, "grad_norm": 2.175466775894165, "learning_rate": 7.588235294117648e-06, "loss": 1.023, "step": 645 }, { "epoch": 0.0228178851953075, "grad_norm": 2.19199538230896, "learning_rate": 7.600000000000001e-06, "loss": 1.0089, "step": 646 }, { "epoch": 0.022853206999015406, "grad_norm": 2.1747562885284424, "learning_rate": 7.611764705882354e-06, "loss": 0.9987, "step": 647 }, { "epoch": 0.02288852880272331, "grad_norm": 2.013759136199951, "learning_rate": 7.6235294117647064e-06, "loss": 1.0263, "step": 648 }, { "epoch": 0.022923850606431217, "grad_norm": 2.094804286956787, "learning_rate": 7.63529411764706e-06, "loss": 1.0201, "step": 649 }, { "epoch": 0.022959172410139124, "grad_norm": 2.161876916885376, "learning_rate": 7.647058823529411e-06, "loss": 0.9921, "step": 650 }, { "epoch": 0.02299449421384703, "grad_norm": 2.141707181930542, "learning_rate": 7.658823529411765e-06, "loss": 0.9889, "step": 651 }, { "epoch": 0.023029816017554935, "grad_norm": 1.9643564224243164, "learning_rate": 7.670588235294119e-06, "loss": 0.9943, "step": 652 }, { "epoch": 0.023065137821262842, "grad_norm": 2.2621538639068604, "learning_rate": 7.682352941176471e-06, "loss": 0.9769, "step": 653 }, { "epoch": 0.02310045962497075, "grad_norm": 2.030285596847534, "learning_rate": 7.694117647058824e-06, "loss": 1.0058, "step": 654 }, { "epoch": 0.023135781428678656, "grad_norm": 2.226062774658203, "learning_rate": 7.705882352941178e-06, "loss": 0.9991, "step": 655 }, { "epoch": 0.023171103232386563, "grad_norm": 2.090308666229248, "learning_rate": 7.71764705882353e-06, "loss": 1.0216, "step": 656 }, { "epoch": 0.023206425036094467, "grad_norm": 2.325178384780884, "learning_rate": 7.729411764705882e-06, "loss": 1.0134, "step": 657 }, { "epoch": 0.023241746839802374, "grad_norm": 1.9827207326889038, "learning_rate": 7.741176470588237e-06, "loss": 1.0061, "step": 658 }, { "epoch": 0.02327706864351028, "grad_norm": 2.136906147003174, "learning_rate": 7.752941176470589e-06, "loss": 1.0155, "step": 659 }, { "epoch": 0.02331239044721819, "grad_norm": 2.256298542022705, "learning_rate": 7.764705882352941e-06, "loss": 1.0051, "step": 660 }, { "epoch": 0.023347712250926092, "grad_norm": 2.1198081970214844, "learning_rate": 7.776470588235294e-06, "loss": 1.0314, "step": 661 }, { "epoch": 0.023383034054634, "grad_norm": 2.1606504917144775, "learning_rate": 7.788235294117648e-06, "loss": 1.0253, "step": 662 }, { "epoch": 0.023418355858341906, "grad_norm": 1.973030924797058, "learning_rate": 7.800000000000002e-06, "loss": 0.9744, "step": 663 }, { "epoch": 0.023453677662049813, "grad_norm": 2.1570005416870117, "learning_rate": 7.811764705882354e-06, "loss": 0.9744, "step": 664 }, { "epoch": 0.02348899946575772, "grad_norm": 2.0831375122070312, "learning_rate": 7.823529411764706e-06, "loss": 0.9882, "step": 665 }, { "epoch": 0.023524321269465624, "grad_norm": 2.0311927795410156, "learning_rate": 7.83529411764706e-06, "loss": 0.9725, "step": 666 }, { "epoch": 0.02355964307317353, "grad_norm": 2.0836474895477295, "learning_rate": 7.847058823529413e-06, "loss": 1.0126, "step": 667 }, { "epoch": 0.02359496487688144, "grad_norm": 2.1976044178009033, "learning_rate": 7.858823529411765e-06, "loss": 0.9831, "step": 668 }, { "epoch": 0.023630286680589346, "grad_norm": 2.2861881256103516, "learning_rate": 7.870588235294119e-06, "loss": 1.0312, "step": 669 }, { "epoch": 0.02366560848429725, "grad_norm": 2.0949652194976807, "learning_rate": 7.882352941176471e-06, "loss": 1.0259, "step": 670 }, { "epoch": 0.023700930288005156, "grad_norm": 2.0553205013275146, "learning_rate": 7.894117647058824e-06, "loss": 1.0384, "step": 671 }, { "epoch": 0.023736252091713064, "grad_norm": 2.070258855819702, "learning_rate": 7.905882352941176e-06, "loss": 1.0096, "step": 672 }, { "epoch": 0.02377157389542097, "grad_norm": 2.392589807510376, "learning_rate": 7.91764705882353e-06, "loss": 1.023, "step": 673 }, { "epoch": 0.023806895699128874, "grad_norm": 2.297055244445801, "learning_rate": 7.929411764705884e-06, "loss": 0.987, "step": 674 }, { "epoch": 0.02384221750283678, "grad_norm": 1.942401647567749, "learning_rate": 7.941176470588236e-06, "loss": 1.0076, "step": 675 }, { "epoch": 0.02387753930654469, "grad_norm": 1.989841103553772, "learning_rate": 7.952941176470589e-06, "loss": 0.9826, "step": 676 }, { "epoch": 0.023912861110252596, "grad_norm": 2.3960578441619873, "learning_rate": 7.964705882352943e-06, "loss": 1.0323, "step": 677 }, { "epoch": 0.023948182913960503, "grad_norm": 2.1151556968688965, "learning_rate": 7.976470588235295e-06, "loss": 0.9896, "step": 678 }, { "epoch": 0.023983504717668407, "grad_norm": 2.098389148712158, "learning_rate": 7.988235294117647e-06, "loss": 1.0503, "step": 679 }, { "epoch": 0.024018826521376314, "grad_norm": 2.0715136528015137, "learning_rate": 8.000000000000001e-06, "loss": 1.0281, "step": 680 }, { "epoch": 0.02405414832508422, "grad_norm": 1.9517695903778076, "learning_rate": 8.011764705882354e-06, "loss": 0.9877, "step": 681 }, { "epoch": 0.024089470128792128, "grad_norm": 2.1994071006774902, "learning_rate": 8.023529411764706e-06, "loss": 1.0196, "step": 682 }, { "epoch": 0.02412479193250003, "grad_norm": 2.0563278198242188, "learning_rate": 8.03529411764706e-06, "loss": 1.0055, "step": 683 }, { "epoch": 0.02416011373620794, "grad_norm": 2.1584813594818115, "learning_rate": 8.047058823529412e-06, "loss": 1.0156, "step": 684 }, { "epoch": 0.024195435539915846, "grad_norm": 2.145855665206909, "learning_rate": 8.058823529411766e-06, "loss": 1.0078, "step": 685 }, { "epoch": 0.024230757343623753, "grad_norm": 2.298475742340088, "learning_rate": 8.070588235294117e-06, "loss": 1.0041, "step": 686 }, { "epoch": 0.02426607914733166, "grad_norm": 2.199794292449951, "learning_rate": 8.082352941176471e-06, "loss": 0.9866, "step": 687 }, { "epoch": 0.024301400951039564, "grad_norm": 2.0872397422790527, "learning_rate": 8.094117647058825e-06, "loss": 0.9785, "step": 688 }, { "epoch": 0.02433672275474747, "grad_norm": 2.068408250808716, "learning_rate": 8.105882352941177e-06, "loss": 1.0464, "step": 689 }, { "epoch": 0.024372044558455378, "grad_norm": 2.079374074935913, "learning_rate": 8.11764705882353e-06, "loss": 0.9847, "step": 690 }, { "epoch": 0.024407366362163285, "grad_norm": 2.005676507949829, "learning_rate": 8.129411764705884e-06, "loss": 0.9929, "step": 691 }, { "epoch": 0.02444268816587119, "grad_norm": 2.174173593521118, "learning_rate": 8.141176470588236e-06, "loss": 1.0175, "step": 692 }, { "epoch": 0.024478009969579096, "grad_norm": 2.1053643226623535, "learning_rate": 8.152941176470588e-06, "loss": 0.9686, "step": 693 }, { "epoch": 0.024513331773287003, "grad_norm": 2.101656198501587, "learning_rate": 8.164705882352942e-06, "loss": 1.012, "step": 694 }, { "epoch": 0.02454865357699491, "grad_norm": 2.0809152126312256, "learning_rate": 8.176470588235295e-06, "loss": 0.9825, "step": 695 }, { "epoch": 0.024583975380702814, "grad_norm": 2.0161430835723877, "learning_rate": 8.188235294117649e-06, "loss": 1.0273, "step": 696 }, { "epoch": 0.02461929718441072, "grad_norm": 2.1539394855499268, "learning_rate": 8.2e-06, "loss": 0.9947, "step": 697 }, { "epoch": 0.024654618988118628, "grad_norm": 2.2180817127227783, "learning_rate": 8.211764705882353e-06, "loss": 1.0046, "step": 698 }, { "epoch": 0.024689940791826535, "grad_norm": 2.00528883934021, "learning_rate": 8.223529411764707e-06, "loss": 1.0159, "step": 699 }, { "epoch": 0.024725262595534443, "grad_norm": 1.9288533926010132, "learning_rate": 8.23529411764706e-06, "loss": 0.9292, "step": 700 }, { "epoch": 0.024760584399242346, "grad_norm": 2.0121169090270996, "learning_rate": 8.247058823529412e-06, "loss": 0.9854, "step": 701 }, { "epoch": 0.024795906202950253, "grad_norm": 1.9955759048461914, "learning_rate": 8.258823529411766e-06, "loss": 1.0403, "step": 702 }, { "epoch": 0.02483122800665816, "grad_norm": 1.938691258430481, "learning_rate": 8.270588235294118e-06, "loss": 0.9971, "step": 703 }, { "epoch": 0.024866549810366068, "grad_norm": 2.313136339187622, "learning_rate": 8.28235294117647e-06, "loss": 0.9879, "step": 704 }, { "epoch": 0.02490187161407397, "grad_norm": 2.198380470275879, "learning_rate": 8.294117647058825e-06, "loss": 0.9973, "step": 705 }, { "epoch": 0.02493719341778188, "grad_norm": 2.1671557426452637, "learning_rate": 8.305882352941177e-06, "loss": 1.0191, "step": 706 }, { "epoch": 0.024972515221489786, "grad_norm": 2.193779230117798, "learning_rate": 8.31764705882353e-06, "loss": 0.9904, "step": 707 }, { "epoch": 0.025007837025197693, "grad_norm": 1.9388431310653687, "learning_rate": 8.329411764705882e-06, "loss": 1.0117, "step": 708 }, { "epoch": 0.025043158828905596, "grad_norm": 2.0575900077819824, "learning_rate": 8.341176470588236e-06, "loss": 1.0013, "step": 709 }, { "epoch": 0.025078480632613503, "grad_norm": 2.0095973014831543, "learning_rate": 8.35294117647059e-06, "loss": 0.9531, "step": 710 }, { "epoch": 0.02511380243632141, "grad_norm": 2.0857980251312256, "learning_rate": 8.364705882352942e-06, "loss": 0.9893, "step": 711 }, { "epoch": 0.025149124240029318, "grad_norm": 2.070645570755005, "learning_rate": 8.376470588235295e-06, "loss": 0.9788, "step": 712 }, { "epoch": 0.025184446043737225, "grad_norm": 2.263744831085205, "learning_rate": 8.388235294117649e-06, "loss": 1.0217, "step": 713 }, { "epoch": 0.02521976784744513, "grad_norm": 2.5736334323883057, "learning_rate": 8.400000000000001e-06, "loss": 1.0288, "step": 714 }, { "epoch": 0.025255089651153036, "grad_norm": 2.126497507095337, "learning_rate": 8.411764705882353e-06, "loss": 0.9451, "step": 715 }, { "epoch": 0.025290411454860943, "grad_norm": 2.1777989864349365, "learning_rate": 8.423529411764707e-06, "loss": 1.0259, "step": 716 }, { "epoch": 0.02532573325856885, "grad_norm": 2.276285409927368, "learning_rate": 8.43529411764706e-06, "loss": 0.9924, "step": 717 }, { "epoch": 0.025361055062276754, "grad_norm": 2.2239885330200195, "learning_rate": 8.447058823529412e-06, "loss": 0.9838, "step": 718 }, { "epoch": 0.02539637686598466, "grad_norm": 2.2291336059570312, "learning_rate": 8.458823529411764e-06, "loss": 0.9505, "step": 719 }, { "epoch": 0.025431698669692568, "grad_norm": 1.8475501537322998, "learning_rate": 8.470588235294118e-06, "loss": 1.0079, "step": 720 }, { "epoch": 0.025467020473400475, "grad_norm": 2.0887651443481445, "learning_rate": 8.482352941176472e-06, "loss": 0.9866, "step": 721 }, { "epoch": 0.025502342277108382, "grad_norm": 2.1458261013031006, "learning_rate": 8.494117647058825e-06, "loss": 1.0178, "step": 722 }, { "epoch": 0.025537664080816286, "grad_norm": 2.159850835800171, "learning_rate": 8.505882352941177e-06, "loss": 1.0, "step": 723 }, { "epoch": 0.025572985884524193, "grad_norm": 1.9374092817306519, "learning_rate": 8.517647058823531e-06, "loss": 0.9852, "step": 724 }, { "epoch": 0.0256083076882321, "grad_norm": 1.9736461639404297, "learning_rate": 8.529411764705883e-06, "loss": 0.9995, "step": 725 }, { "epoch": 0.025643629491940007, "grad_norm": 2.7430617809295654, "learning_rate": 8.541176470588236e-06, "loss": 1.0043, "step": 726 }, { "epoch": 0.02567895129564791, "grad_norm": 2.3055522441864014, "learning_rate": 8.55294117647059e-06, "loss": 0.9899, "step": 727 }, { "epoch": 0.025714273099355818, "grad_norm": 2.15338397026062, "learning_rate": 8.564705882352942e-06, "loss": 1.0144, "step": 728 }, { "epoch": 0.025749594903063725, "grad_norm": 1.9656095504760742, "learning_rate": 8.576470588235294e-06, "loss": 0.9959, "step": 729 }, { "epoch": 0.025784916706771632, "grad_norm": 2.072791576385498, "learning_rate": 8.588235294117647e-06, "loss": 1.0165, "step": 730 }, { "epoch": 0.025820238510479536, "grad_norm": 2.350372552871704, "learning_rate": 8.6e-06, "loss": 0.9938, "step": 731 }, { "epoch": 0.025855560314187443, "grad_norm": 2.0330371856689453, "learning_rate": 8.611764705882355e-06, "loss": 0.9893, "step": 732 }, { "epoch": 0.02589088211789535, "grad_norm": 2.0057387351989746, "learning_rate": 8.623529411764705e-06, "loss": 0.9631, "step": 733 }, { "epoch": 0.025926203921603257, "grad_norm": 1.6789395809173584, "learning_rate": 8.63529411764706e-06, "loss": 0.6086, "step": 734 }, { "epoch": 0.025961525725311164, "grad_norm": 2.5072109699249268, "learning_rate": 8.647058823529413e-06, "loss": 0.9961, "step": 735 }, { "epoch": 0.025996847529019068, "grad_norm": 2.5306546688079834, "learning_rate": 8.658823529411766e-06, "loss": 1.0073, "step": 736 }, { "epoch": 0.026032169332726975, "grad_norm": 2.3554232120513916, "learning_rate": 8.670588235294118e-06, "loss": 0.987, "step": 737 }, { "epoch": 0.026067491136434882, "grad_norm": 2.0120389461517334, "learning_rate": 8.682352941176472e-06, "loss": 1.022, "step": 738 }, { "epoch": 0.02610281294014279, "grad_norm": 2.1840903759002686, "learning_rate": 8.694117647058824e-06, "loss": 0.9821, "step": 739 }, { "epoch": 0.026138134743850693, "grad_norm": 2.208526134490967, "learning_rate": 8.705882352941177e-06, "loss": 0.9886, "step": 740 }, { "epoch": 0.0261734565475586, "grad_norm": 1.94496750831604, "learning_rate": 8.717647058823529e-06, "loss": 0.9811, "step": 741 }, { "epoch": 0.026208778351266507, "grad_norm": 1.9838051795959473, "learning_rate": 8.729411764705883e-06, "loss": 1.0135, "step": 742 }, { "epoch": 0.026244100154974415, "grad_norm": 2.335092067718506, "learning_rate": 8.741176470588237e-06, "loss": 1.063, "step": 743 }, { "epoch": 0.026279421958682322, "grad_norm": 2.022749423980713, "learning_rate": 8.752941176470588e-06, "loss": 0.9413, "step": 744 }, { "epoch": 0.026314743762390225, "grad_norm": 1.9587761163711548, "learning_rate": 8.764705882352942e-06, "loss": 0.9646, "step": 745 }, { "epoch": 0.026350065566098133, "grad_norm": 2.0685410499572754, "learning_rate": 8.776470588235296e-06, "loss": 0.997, "step": 746 }, { "epoch": 0.02638538736980604, "grad_norm": 2.412529706954956, "learning_rate": 8.788235294117648e-06, "loss": 1.0237, "step": 747 }, { "epoch": 0.026420709173513947, "grad_norm": 2.902406930923462, "learning_rate": 8.8e-06, "loss": 1.0131, "step": 748 }, { "epoch": 0.02645603097722185, "grad_norm": 2.035698413848877, "learning_rate": 8.811764705882354e-06, "loss": 1.0105, "step": 749 }, { "epoch": 0.026491352780929758, "grad_norm": 1.8951383829116821, "learning_rate": 8.823529411764707e-06, "loss": 0.9826, "step": 750 }, { "epoch": 0.026526674584637665, "grad_norm": 2.1077215671539307, "learning_rate": 8.835294117647059e-06, "loss": 1.0038, "step": 751 }, { "epoch": 0.026561996388345572, "grad_norm": 2.2107508182525635, "learning_rate": 8.847058823529413e-06, "loss": 1.0076, "step": 752 }, { "epoch": 0.026597318192053476, "grad_norm": 2.0170764923095703, "learning_rate": 8.858823529411765e-06, "loss": 1.0071, "step": 753 }, { "epoch": 0.026632639995761383, "grad_norm": 2.0511951446533203, "learning_rate": 8.870588235294118e-06, "loss": 0.9931, "step": 754 }, { "epoch": 0.02666796179946929, "grad_norm": 2.3721683025360107, "learning_rate": 8.88235294117647e-06, "loss": 1.0267, "step": 755 }, { "epoch": 0.026703283603177197, "grad_norm": 2.1805200576782227, "learning_rate": 8.894117647058824e-06, "loss": 1.0137, "step": 756 }, { "epoch": 0.026738605406885104, "grad_norm": 2.167468786239624, "learning_rate": 8.905882352941178e-06, "loss": 0.9788, "step": 757 }, { "epoch": 0.026773927210593008, "grad_norm": 2.2298619747161865, "learning_rate": 8.91764705882353e-06, "loss": 1.0252, "step": 758 }, { "epoch": 0.026809249014300915, "grad_norm": 2.2657902240753174, "learning_rate": 8.929411764705883e-06, "loss": 0.9938, "step": 759 }, { "epoch": 0.026844570818008822, "grad_norm": 2.1053590774536133, "learning_rate": 8.941176470588237e-06, "loss": 1.0119, "step": 760 }, { "epoch": 0.02687989262171673, "grad_norm": 2.0419652462005615, "learning_rate": 8.95294117647059e-06, "loss": 0.9754, "step": 761 }, { "epoch": 0.026915214425424633, "grad_norm": 2.0424439907073975, "learning_rate": 8.964705882352942e-06, "loss": 0.9724, "step": 762 }, { "epoch": 0.02695053622913254, "grad_norm": 2.0040054321289062, "learning_rate": 8.976470588235296e-06, "loss": 1.0051, "step": 763 }, { "epoch": 0.026985858032840447, "grad_norm": 2.3398964405059814, "learning_rate": 8.988235294117648e-06, "loss": 0.9983, "step": 764 }, { "epoch": 0.027021179836548354, "grad_norm": 1.785666584968567, "learning_rate": 9e-06, "loss": 0.6009, "step": 765 }, { "epoch": 0.02705650164025626, "grad_norm": 2.1283786296844482, "learning_rate": 9.011764705882353e-06, "loss": 1.0232, "step": 766 }, { "epoch": 0.027091823443964165, "grad_norm": 2.2327377796173096, "learning_rate": 9.023529411764707e-06, "loss": 0.9811, "step": 767 }, { "epoch": 0.027127145247672072, "grad_norm": 2.1422064304351807, "learning_rate": 9.03529411764706e-06, "loss": 1.0147, "step": 768 }, { "epoch": 0.02716246705137998, "grad_norm": 2.2404332160949707, "learning_rate": 9.047058823529413e-06, "loss": 0.9818, "step": 769 }, { "epoch": 0.027197788855087886, "grad_norm": 2.181302547454834, "learning_rate": 9.058823529411765e-06, "loss": 0.9869, "step": 770 }, { "epoch": 0.02723311065879579, "grad_norm": 2.297339677810669, "learning_rate": 9.07058823529412e-06, "loss": 0.9862, "step": 771 }, { "epoch": 0.027268432462503697, "grad_norm": 2.013246536254883, "learning_rate": 9.082352941176472e-06, "loss": 1.0002, "step": 772 }, { "epoch": 0.027303754266211604, "grad_norm": 2.1923110485076904, "learning_rate": 9.094117647058824e-06, "loss": 0.9904, "step": 773 }, { "epoch": 0.02733907606991951, "grad_norm": 2.1318089962005615, "learning_rate": 9.105882352941178e-06, "loss": 1.0157, "step": 774 }, { "epoch": 0.027374397873627415, "grad_norm": 2.12825083732605, "learning_rate": 9.11764705882353e-06, "loss": 0.9881, "step": 775 }, { "epoch": 0.027409719677335322, "grad_norm": 2.0761008262634277, "learning_rate": 9.129411764705883e-06, "loss": 1.0068, "step": 776 }, { "epoch": 0.02744504148104323, "grad_norm": 2.002040386199951, "learning_rate": 9.141176470588235e-06, "loss": 0.9705, "step": 777 }, { "epoch": 0.027480363284751137, "grad_norm": 1.9959746599197388, "learning_rate": 9.152941176470589e-06, "loss": 0.9999, "step": 778 }, { "epoch": 0.027515685088459044, "grad_norm": 2.1766512393951416, "learning_rate": 9.164705882352943e-06, "loss": 0.9946, "step": 779 }, { "epoch": 0.027551006892166947, "grad_norm": 2.005800485610962, "learning_rate": 9.176470588235294e-06, "loss": 0.9669, "step": 780 }, { "epoch": 0.027586328695874855, "grad_norm": 2.457036018371582, "learning_rate": 9.188235294117648e-06, "loss": 0.9991, "step": 781 }, { "epoch": 0.02762165049958276, "grad_norm": 2.194183826446533, "learning_rate": 9.200000000000002e-06, "loss": 0.9675, "step": 782 }, { "epoch": 0.02765697230329067, "grad_norm": 2.17456316947937, "learning_rate": 9.211764705882354e-06, "loss": 1.0016, "step": 783 }, { "epoch": 0.027692294106998572, "grad_norm": 2.0397355556488037, "learning_rate": 9.223529411764706e-06, "loss": 1.0258, "step": 784 }, { "epoch": 0.02772761591070648, "grad_norm": 2.0939981937408447, "learning_rate": 9.23529411764706e-06, "loss": 0.993, "step": 785 }, { "epoch": 0.027762937714414387, "grad_norm": 1.9285393953323364, "learning_rate": 9.247058823529413e-06, "loss": 0.9864, "step": 786 }, { "epoch": 0.027798259518122294, "grad_norm": 2.04762864112854, "learning_rate": 9.258823529411765e-06, "loss": 0.9973, "step": 787 }, { "epoch": 0.0278335813218302, "grad_norm": 1.9416786432266235, "learning_rate": 9.270588235294117e-06, "loss": 0.9741, "step": 788 }, { "epoch": 0.027868903125538105, "grad_norm": 2.006859302520752, "learning_rate": 9.282352941176471e-06, "loss": 1.0087, "step": 789 }, { "epoch": 0.027904224929246012, "grad_norm": 2.002847194671631, "learning_rate": 9.294117647058824e-06, "loss": 0.9917, "step": 790 }, { "epoch": 0.02793954673295392, "grad_norm": 2.1568222045898438, "learning_rate": 9.305882352941176e-06, "loss": 1.0272, "step": 791 }, { "epoch": 0.027974868536661826, "grad_norm": 2.002418279647827, "learning_rate": 9.31764705882353e-06, "loss": 0.9817, "step": 792 }, { "epoch": 0.02801019034036973, "grad_norm": 1.9659137725830078, "learning_rate": 9.329411764705884e-06, "loss": 0.9923, "step": 793 }, { "epoch": 0.028045512144077637, "grad_norm": 2.0747156143188477, "learning_rate": 9.341176470588236e-06, "loss": 0.9962, "step": 794 }, { "epoch": 0.028080833947785544, "grad_norm": 2.0936954021453857, "learning_rate": 9.352941176470589e-06, "loss": 1.041, "step": 795 }, { "epoch": 0.02811615575149345, "grad_norm": 2.5340447425842285, "learning_rate": 9.364705882352943e-06, "loss": 0.9605, "step": 796 }, { "epoch": 0.028151477555201355, "grad_norm": 2.2048537731170654, "learning_rate": 9.376470588235295e-06, "loss": 0.9988, "step": 797 }, { "epoch": 0.028186799358909262, "grad_norm": 2.048978328704834, "learning_rate": 9.388235294117647e-06, "loss": 1.0018, "step": 798 }, { "epoch": 0.02822212116261717, "grad_norm": 2.0799739360809326, "learning_rate": 9.4e-06, "loss": 0.987, "step": 799 }, { "epoch": 0.028257442966325076, "grad_norm": 2.023375988006592, "learning_rate": 9.411764705882354e-06, "loss": 1.0043, "step": 800 }, { "epoch": 0.028292764770032983, "grad_norm": 2.0257933139801025, "learning_rate": 9.423529411764706e-06, "loss": 0.9842, "step": 801 }, { "epoch": 0.028328086573740887, "grad_norm": 2.270951986312866, "learning_rate": 9.435294117647058e-06, "loss": 1.0193, "step": 802 }, { "epoch": 0.028363408377448794, "grad_norm": 2.1870880126953125, "learning_rate": 9.447058823529412e-06, "loss": 1.0713, "step": 803 }, { "epoch": 0.0283987301811567, "grad_norm": 2.5287065505981445, "learning_rate": 9.458823529411767e-06, "loss": 0.9882, "step": 804 }, { "epoch": 0.02843405198486461, "grad_norm": 2.233236789703369, "learning_rate": 9.470588235294119e-06, "loss": 1.0243, "step": 805 }, { "epoch": 0.028469373788572512, "grad_norm": 1.9494956731796265, "learning_rate": 9.482352941176471e-06, "loss": 0.9623, "step": 806 }, { "epoch": 0.02850469559228042, "grad_norm": 2.1593713760375977, "learning_rate": 9.494117647058825e-06, "loss": 0.9834, "step": 807 }, { "epoch": 0.028540017395988326, "grad_norm": 2.1133525371551514, "learning_rate": 9.505882352941178e-06, "loss": 0.9915, "step": 808 }, { "epoch": 0.028575339199696233, "grad_norm": 2.114738941192627, "learning_rate": 9.51764705882353e-06, "loss": 0.9745, "step": 809 }, { "epoch": 0.028610661003404137, "grad_norm": 2.049543619155884, "learning_rate": 9.529411764705882e-06, "loss": 0.9877, "step": 810 }, { "epoch": 0.028645982807112044, "grad_norm": 1.9791558980941772, "learning_rate": 9.541176470588236e-06, "loss": 0.989, "step": 811 }, { "epoch": 0.02868130461081995, "grad_norm": 2.1868298053741455, "learning_rate": 9.552941176470589e-06, "loss": 0.9969, "step": 812 }, { "epoch": 0.02871662641452786, "grad_norm": 2.0764620304107666, "learning_rate": 9.564705882352941e-06, "loss": 0.9637, "step": 813 }, { "epoch": 0.028751948218235766, "grad_norm": 1.8099284172058105, "learning_rate": 9.576470588235295e-06, "loss": 0.994, "step": 814 }, { "epoch": 0.02878727002194367, "grad_norm": 2.161543846130371, "learning_rate": 9.588235294117649e-06, "loss": 1.0258, "step": 815 }, { "epoch": 0.028822591825651576, "grad_norm": 1.907556176185608, "learning_rate": 9.600000000000001e-06, "loss": 0.9789, "step": 816 }, { "epoch": 0.028857913629359484, "grad_norm": 2.0447444915771484, "learning_rate": 9.611764705882354e-06, "loss": 0.9865, "step": 817 }, { "epoch": 0.02889323543306739, "grad_norm": 2.0145885944366455, "learning_rate": 9.623529411764708e-06, "loss": 1.0137, "step": 818 }, { "epoch": 0.028928557236775294, "grad_norm": 2.0979883670806885, "learning_rate": 9.63529411764706e-06, "loss": 0.9977, "step": 819 }, { "epoch": 0.0289638790404832, "grad_norm": 2.0467302799224854, "learning_rate": 9.647058823529412e-06, "loss": 0.9673, "step": 820 }, { "epoch": 0.02899920084419111, "grad_norm": 1.884478211402893, "learning_rate": 9.658823529411766e-06, "loss": 0.9744, "step": 821 }, { "epoch": 0.029034522647899016, "grad_norm": 1.6658234596252441, "learning_rate": 9.670588235294119e-06, "loss": 0.6028, "step": 822 }, { "epoch": 0.029069844451606923, "grad_norm": 2.1594345569610596, "learning_rate": 9.682352941176471e-06, "loss": 0.979, "step": 823 }, { "epoch": 0.029105166255314827, "grad_norm": 2.421325206756592, "learning_rate": 9.694117647058823e-06, "loss": 0.9926, "step": 824 }, { "epoch": 0.029140488059022734, "grad_norm": 2.0061967372894287, "learning_rate": 9.705882352941177e-06, "loss": 0.9655, "step": 825 }, { "epoch": 0.02917580986273064, "grad_norm": 1.9647564888000488, "learning_rate": 9.717647058823531e-06, "loss": 1.0041, "step": 826 }, { "epoch": 0.029211131666438548, "grad_norm": 1.9108964204788208, "learning_rate": 9.729411764705882e-06, "loss": 0.9999, "step": 827 }, { "epoch": 0.02924645347014645, "grad_norm": 2.3224916458129883, "learning_rate": 9.741176470588236e-06, "loss": 0.945, "step": 828 }, { "epoch": 0.02928177527385436, "grad_norm": 2.0473310947418213, "learning_rate": 9.75294117647059e-06, "loss": 0.9805, "step": 829 }, { "epoch": 0.029317097077562266, "grad_norm": 2.1996638774871826, "learning_rate": 9.764705882352942e-06, "loss": 1.0021, "step": 830 }, { "epoch": 0.029352418881270173, "grad_norm": 2.1778342723846436, "learning_rate": 9.776470588235295e-06, "loss": 1.0082, "step": 831 }, { "epoch": 0.029387740684978077, "grad_norm": 2.0937511920928955, "learning_rate": 9.788235294117649e-06, "loss": 0.9806, "step": 832 }, { "epoch": 0.029423062488685984, "grad_norm": 2.4341790676116943, "learning_rate": 9.800000000000001e-06, "loss": 0.9672, "step": 833 }, { "epoch": 0.02945838429239389, "grad_norm": 2.0848848819732666, "learning_rate": 9.811764705882353e-06, "loss": 0.9506, "step": 834 }, { "epoch": 0.029493706096101798, "grad_norm": 2.0325379371643066, "learning_rate": 9.823529411764706e-06, "loss": 0.9852, "step": 835 }, { "epoch": 0.029529027899809705, "grad_norm": 2.134950876235962, "learning_rate": 9.83529411764706e-06, "loss": 0.9776, "step": 836 }, { "epoch": 0.02956434970351761, "grad_norm": 1.8029638528823853, "learning_rate": 9.847058823529412e-06, "loss": 0.9598, "step": 837 }, { "epoch": 0.029599671507225516, "grad_norm": 2.356480836868286, "learning_rate": 9.858823529411764e-06, "loss": 0.9702, "step": 838 }, { "epoch": 0.029634993310933423, "grad_norm": 1.9825215339660645, "learning_rate": 9.870588235294118e-06, "loss": 0.9745, "step": 839 }, { "epoch": 0.02967031511464133, "grad_norm": 2.0214216709136963, "learning_rate": 9.882352941176472e-06, "loss": 1.0291, "step": 840 }, { "epoch": 0.029705636918349234, "grad_norm": 1.9091798067092896, "learning_rate": 9.894117647058825e-06, "loss": 0.9673, "step": 841 }, { "epoch": 0.02974095872205714, "grad_norm": 2.0131757259368896, "learning_rate": 9.905882352941177e-06, "loss": 1.0013, "step": 842 }, { "epoch": 0.02977628052576505, "grad_norm": 1.9865522384643555, "learning_rate": 9.917647058823531e-06, "loss": 0.9947, "step": 843 }, { "epoch": 0.029811602329472955, "grad_norm": 2.0340702533721924, "learning_rate": 9.929411764705883e-06, "loss": 0.9445, "step": 844 }, { "epoch": 0.029846924133180863, "grad_norm": 1.864653468132019, "learning_rate": 9.941176470588236e-06, "loss": 0.9697, "step": 845 }, { "epoch": 0.029882245936888766, "grad_norm": 2.1321310997009277, "learning_rate": 9.952941176470588e-06, "loss": 1.0277, "step": 846 }, { "epoch": 0.029917567740596673, "grad_norm": 2.132591724395752, "learning_rate": 9.964705882352942e-06, "loss": 0.981, "step": 847 }, { "epoch": 0.02995288954430458, "grad_norm": 2.0132322311401367, "learning_rate": 9.976470588235294e-06, "loss": 0.9547, "step": 848 }, { "epoch": 0.029988211348012488, "grad_norm": 2.0054287910461426, "learning_rate": 9.988235294117647e-06, "loss": 0.9823, "step": 849 }, { "epoch": 0.03002353315172039, "grad_norm": 2.003511905670166, "learning_rate": 1e-05, "loss": 0.9655, "step": 850 }, { "epoch": 0.0300588549554283, "grad_norm": 1.9812480211257935, "learning_rate": 9.999999967280472e-06, "loss": 0.9704, "step": 851 }, { "epoch": 0.030094176759136206, "grad_norm": 2.15525221824646, "learning_rate": 9.99999986912188e-06, "loss": 0.992, "step": 852 }, { "epoch": 0.030129498562844113, "grad_norm": 2.097170114517212, "learning_rate": 9.999999705524233e-06, "loss": 1.0063, "step": 853 }, { "epoch": 0.030164820366552016, "grad_norm": 2.0987653732299805, "learning_rate": 9.999999476487529e-06, "loss": 0.9723, "step": 854 }, { "epoch": 0.030200142170259923, "grad_norm": 2.1141161918640137, "learning_rate": 9.999999182011771e-06, "loss": 0.9895, "step": 855 }, { "epoch": 0.03023546397396783, "grad_norm": 2.2219388484954834, "learning_rate": 9.999998822096963e-06, "loss": 0.9939, "step": 856 }, { "epoch": 0.030270785777675738, "grad_norm": 2.279585599899292, "learning_rate": 9.999998396743113e-06, "loss": 1.004, "step": 857 }, { "epoch": 0.030306107581383645, "grad_norm": 1.9178402423858643, "learning_rate": 9.999997905950222e-06, "loss": 0.966, "step": 858 }, { "epoch": 0.03034142938509155, "grad_norm": 1.9718307256698608, "learning_rate": 9.999997349718297e-06, "loss": 0.9826, "step": 859 }, { "epoch": 0.030376751188799456, "grad_norm": 2.016958475112915, "learning_rate": 9.999996728047349e-06, "loss": 1.006, "step": 860 }, { "epoch": 0.030412072992507363, "grad_norm": 1.9763656854629517, "learning_rate": 9.999996040937383e-06, "loss": 0.9467, "step": 861 }, { "epoch": 0.03044739479621527, "grad_norm": 2.11072039604187, "learning_rate": 9.999995288388408e-06, "loss": 1.0345, "step": 862 }, { "epoch": 0.030482716599923174, "grad_norm": 2.1916773319244385, "learning_rate": 9.999994470400436e-06, "loss": 0.9647, "step": 863 }, { "epoch": 0.03051803840363108, "grad_norm": 2.1033058166503906, "learning_rate": 9.999993586973475e-06, "loss": 0.9857, "step": 864 }, { "epoch": 0.030553360207338988, "grad_norm": 2.1276276111602783, "learning_rate": 9.999992638107538e-06, "loss": 1.0075, "step": 865 }, { "epoch": 0.030588682011046895, "grad_norm": 2.0130574703216553, "learning_rate": 9.999991623802638e-06, "loss": 0.989, "step": 866 }, { "epoch": 0.030624003814754802, "grad_norm": 2.1902413368225098, "learning_rate": 9.999990544058786e-06, "loss": 0.9822, "step": 867 }, { "epoch": 0.030659325618462706, "grad_norm": 3.493767261505127, "learning_rate": 9.999989398876e-06, "loss": 0.6813, "step": 868 }, { "epoch": 0.030694647422170613, "grad_norm": 2.369807243347168, "learning_rate": 9.99998818825429e-06, "loss": 0.989, "step": 869 }, { "epoch": 0.03072996922587852, "grad_norm": 2.220041036605835, "learning_rate": 9.999986912193675e-06, "loss": 0.9828, "step": 870 }, { "epoch": 0.030765291029586427, "grad_norm": 1.7968559265136719, "learning_rate": 9.999985570694172e-06, "loss": 0.9502, "step": 871 }, { "epoch": 0.03080061283329433, "grad_norm": 2.416626214981079, "learning_rate": 9.999984163755798e-06, "loss": 1.006, "step": 872 }, { "epoch": 0.030835934637002238, "grad_norm": 2.1176562309265137, "learning_rate": 9.999982691378571e-06, "loss": 0.9781, "step": 873 }, { "epoch": 0.030871256440710145, "grad_norm": 1.9388023614883423, "learning_rate": 9.99998115356251e-06, "loss": 0.9501, "step": 874 }, { "epoch": 0.030906578244418052, "grad_norm": 2.0678250789642334, "learning_rate": 9.999979550307636e-06, "loss": 0.9705, "step": 875 }, { "epoch": 0.030941900048125956, "grad_norm": 2.1069865226745605, "learning_rate": 9.99997788161397e-06, "loss": 1.0039, "step": 876 }, { "epoch": 0.030977221851833863, "grad_norm": 1.8703721761703491, "learning_rate": 9.999976147481531e-06, "loss": 0.9647, "step": 877 }, { "epoch": 0.03101254365554177, "grad_norm": 1.812330722808838, "learning_rate": 9.999974347910346e-06, "loss": 0.994, "step": 878 }, { "epoch": 0.031047865459249677, "grad_norm": 1.9950008392333984, "learning_rate": 9.999972482900438e-06, "loss": 0.9766, "step": 879 }, { "epoch": 0.031083187262957584, "grad_norm": 2.4430506229400635, "learning_rate": 9.999970552451828e-06, "loss": 0.9538, "step": 880 }, { "epoch": 0.031118509066665488, "grad_norm": 2.1457056999206543, "learning_rate": 9.999968556564543e-06, "loss": 1.0222, "step": 881 }, { "epoch": 0.031153830870373395, "grad_norm": 2.1483609676361084, "learning_rate": 9.999966495238612e-06, "loss": 0.9783, "step": 882 }, { "epoch": 0.031189152674081302, "grad_norm": 1.9248230457305908, "learning_rate": 9.999964368474056e-06, "loss": 0.9582, "step": 883 }, { "epoch": 0.03122447447778921, "grad_norm": 2.0874650478363037, "learning_rate": 9.999962176270907e-06, "loss": 0.9692, "step": 884 }, { "epoch": 0.03125979628149712, "grad_norm": 2.1138198375701904, "learning_rate": 9.999959918629193e-06, "loss": 1.0114, "step": 885 }, { "epoch": 0.03129511808520502, "grad_norm": 2.0106379985809326, "learning_rate": 9.999957595548946e-06, "loss": 0.9971, "step": 886 }, { "epoch": 0.031330439888912924, "grad_norm": 1.9059561491012573, "learning_rate": 9.999955207030191e-06, "loss": 0.9562, "step": 887 }, { "epoch": 0.031365761692620835, "grad_norm": 1.9141288995742798, "learning_rate": 9.999952753072964e-06, "loss": 0.9873, "step": 888 }, { "epoch": 0.03140108349632874, "grad_norm": 2.0713417530059814, "learning_rate": 9.999950233677295e-06, "loss": 0.9795, "step": 889 }, { "epoch": 0.03143640530003665, "grad_norm": 1.9465662240982056, "learning_rate": 9.999947648843217e-06, "loss": 0.9905, "step": 890 }, { "epoch": 0.03147172710374455, "grad_norm": 1.8682918548583984, "learning_rate": 9.999944998570763e-06, "loss": 0.9805, "step": 891 }, { "epoch": 0.031507048907452456, "grad_norm": 2.0127651691436768, "learning_rate": 9.999942282859968e-06, "loss": 0.9756, "step": 892 }, { "epoch": 0.03154237071116037, "grad_norm": 1.9756802320480347, "learning_rate": 9.999939501710871e-06, "loss": 0.9831, "step": 893 }, { "epoch": 0.03157769251486827, "grad_norm": 2.137294292449951, "learning_rate": 9.999936655123505e-06, "loss": 0.9512, "step": 894 }, { "epoch": 0.03161301431857618, "grad_norm": 2.2593390941619873, "learning_rate": 9.999933743097908e-06, "loss": 1.006, "step": 895 }, { "epoch": 0.031648336122284085, "grad_norm": 2.0855815410614014, "learning_rate": 9.999930765634117e-06, "loss": 0.9845, "step": 896 }, { "epoch": 0.03168365792599199, "grad_norm": 2.127829074859619, "learning_rate": 9.999927722732173e-06, "loss": 0.9869, "step": 897 }, { "epoch": 0.0317189797296999, "grad_norm": 2.10066819190979, "learning_rate": 9.999924614392114e-06, "loss": 1.0176, "step": 898 }, { "epoch": 0.0317543015334078, "grad_norm": 1.8728845119476318, "learning_rate": 9.999921440613982e-06, "loss": 0.9179, "step": 899 }, { "epoch": 0.031789623337115706, "grad_norm": 1.9281138181686401, "learning_rate": 9.999918201397818e-06, "loss": 1.0081, "step": 900 }, { "epoch": 0.03182494514082362, "grad_norm": 1.9695630073547363, "learning_rate": 9.999914896743665e-06, "loss": 0.9863, "step": 901 }, { "epoch": 0.03186026694453152, "grad_norm": 2.0245678424835205, "learning_rate": 9.999911526651565e-06, "loss": 0.9758, "step": 902 }, { "epoch": 0.03189558874823943, "grad_norm": 2.097132682800293, "learning_rate": 9.999908091121562e-06, "loss": 1.0124, "step": 903 }, { "epoch": 0.031930910551947335, "grad_norm": 2.086601495742798, "learning_rate": 9.999904590153702e-06, "loss": 1.0004, "step": 904 }, { "epoch": 0.03196623235565524, "grad_norm": 2.1523964405059814, "learning_rate": 9.999901023748032e-06, "loss": 0.9848, "step": 905 }, { "epoch": 0.03200155415936315, "grad_norm": 1.9178029298782349, "learning_rate": 9.999897391904597e-06, "loss": 0.9542, "step": 906 }, { "epoch": 0.03203687596307105, "grad_norm": 2.1478028297424316, "learning_rate": 9.999893694623444e-06, "loss": 1.0257, "step": 907 }, { "epoch": 0.03207219776677896, "grad_norm": 1.9098460674285889, "learning_rate": 9.999889931904622e-06, "loss": 0.9543, "step": 908 }, { "epoch": 0.03210751957048687, "grad_norm": 1.9795230627059937, "learning_rate": 9.999886103748182e-06, "loss": 0.9638, "step": 909 }, { "epoch": 0.03214284137419477, "grad_norm": 2.0591280460357666, "learning_rate": 9.999882210154173e-06, "loss": 0.9478, "step": 910 }, { "epoch": 0.03217816317790268, "grad_norm": 2.049661874771118, "learning_rate": 9.999878251122643e-06, "loss": 0.9913, "step": 911 }, { "epoch": 0.032213484981610585, "grad_norm": 2.100139617919922, "learning_rate": 9.999874226653648e-06, "loss": 0.9904, "step": 912 }, { "epoch": 0.032248806785318496, "grad_norm": 2.134187936782837, "learning_rate": 9.999870136747239e-06, "loss": 0.965, "step": 913 }, { "epoch": 0.0322841285890264, "grad_norm": 2.0058486461639404, "learning_rate": 9.999865981403467e-06, "loss": 0.9623, "step": 914 }, { "epoch": 0.0323194503927343, "grad_norm": 1.8415902853012085, "learning_rate": 9.999861760622392e-06, "loss": 0.9767, "step": 915 }, { "epoch": 0.032354772196442214, "grad_norm": 2.083472967147827, "learning_rate": 9.999857474404065e-06, "loss": 1.029, "step": 916 }, { "epoch": 0.03239009400015012, "grad_norm": 2.100424289703369, "learning_rate": 9.999853122748542e-06, "loss": 0.9857, "step": 917 }, { "epoch": 0.03242541580385802, "grad_norm": 2.2422239780426025, "learning_rate": 9.999848705655884e-06, "loss": 0.9906, "step": 918 }, { "epoch": 0.03246073760756593, "grad_norm": 2.034264087677002, "learning_rate": 9.999844223126143e-06, "loss": 0.9836, "step": 919 }, { "epoch": 0.032496059411273835, "grad_norm": 1.9874385595321655, "learning_rate": 9.999839675159383e-06, "loss": 0.9822, "step": 920 }, { "epoch": 0.032531381214981746, "grad_norm": 1.991052269935608, "learning_rate": 9.99983506175566e-06, "loss": 0.9468, "step": 921 }, { "epoch": 0.03256670301868965, "grad_norm": 1.9591878652572632, "learning_rate": 9.999830382915036e-06, "loss": 0.9637, "step": 922 }, { "epoch": 0.03260202482239755, "grad_norm": 2.143686294555664, "learning_rate": 9.999825638637572e-06, "loss": 0.9809, "step": 923 }, { "epoch": 0.032637346626105464, "grad_norm": 2.54646372795105, "learning_rate": 9.99982082892333e-06, "loss": 1.0, "step": 924 }, { "epoch": 0.03267266842981337, "grad_norm": 2.093919515609741, "learning_rate": 9.999815953772373e-06, "loss": 0.9816, "step": 925 }, { "epoch": 0.03270799023352128, "grad_norm": 1.9444583654403687, "learning_rate": 9.999811013184763e-06, "loss": 0.9764, "step": 926 }, { "epoch": 0.03274331203722918, "grad_norm": 2.039036750793457, "learning_rate": 9.99980600716057e-06, "loss": 1.0295, "step": 927 }, { "epoch": 0.032778633840937085, "grad_norm": 1.9413032531738281, "learning_rate": 9.999800935699852e-06, "loss": 1.0198, "step": 928 }, { "epoch": 0.032813955644644996, "grad_norm": 2.17840576171875, "learning_rate": 9.999795798802681e-06, "loss": 0.9939, "step": 929 }, { "epoch": 0.0328492774483529, "grad_norm": 2.021207571029663, "learning_rate": 9.99979059646912e-06, "loss": 0.9943, "step": 930 }, { "epoch": 0.0328845992520608, "grad_norm": 1.8876856565475464, "learning_rate": 9.999785328699242e-06, "loss": 0.9705, "step": 931 }, { "epoch": 0.032919921055768714, "grad_norm": 2.003349542617798, "learning_rate": 9.999779995493113e-06, "loss": 1.0141, "step": 932 }, { "epoch": 0.03295524285947662, "grad_norm": 2.0528066158294678, "learning_rate": 9.999774596850805e-06, "loss": 0.9697, "step": 933 }, { "epoch": 0.03299056466318453, "grad_norm": 1.8142645359039307, "learning_rate": 9.999769132772383e-06, "loss": 0.9365, "step": 934 }, { "epoch": 0.03302588646689243, "grad_norm": 1.913378119468689, "learning_rate": 9.999763603257926e-06, "loss": 0.9468, "step": 935 }, { "epoch": 0.033061208270600335, "grad_norm": 2.022690773010254, "learning_rate": 9.999758008307501e-06, "loss": 0.9776, "step": 936 }, { "epoch": 0.033096530074308246, "grad_norm": 2.092421054840088, "learning_rate": 9.999752347921183e-06, "loss": 0.9575, "step": 937 }, { "epoch": 0.03313185187801615, "grad_norm": 2.047075033187866, "learning_rate": 9.999746622099046e-06, "loss": 0.9941, "step": 938 }, { "epoch": 0.03316717368172406, "grad_norm": 1.9745863676071167, "learning_rate": 9.999740830841167e-06, "loss": 0.9613, "step": 939 }, { "epoch": 0.033202495485431964, "grad_norm": 2.0291786193847656, "learning_rate": 9.999734974147619e-06, "loss": 0.9595, "step": 940 }, { "epoch": 0.03323781728913987, "grad_norm": 2.225315570831299, "learning_rate": 9.999729052018478e-06, "loss": 1.0254, "step": 941 }, { "epoch": 0.03327313909284778, "grad_norm": 1.9808237552642822, "learning_rate": 9.999723064453825e-06, "loss": 0.9734, "step": 942 }, { "epoch": 0.03330846089655568, "grad_norm": 1.9938815832138062, "learning_rate": 9.999717011453735e-06, "loss": 0.9923, "step": 943 }, { "epoch": 0.033343782700263586, "grad_norm": 1.9923611879348755, "learning_rate": 9.999710893018289e-06, "loss": 0.9718, "step": 944 }, { "epoch": 0.033379104503971496, "grad_norm": 2.063457489013672, "learning_rate": 9.999704709147567e-06, "loss": 0.9922, "step": 945 }, { "epoch": 0.0334144263076794, "grad_norm": 1.851140022277832, "learning_rate": 9.999698459841649e-06, "loss": 0.948, "step": 946 }, { "epoch": 0.03344974811138731, "grad_norm": 2.1766774654388428, "learning_rate": 9.999692145100618e-06, "loss": 0.9909, "step": 947 }, { "epoch": 0.033485069915095214, "grad_norm": 2.0611135959625244, "learning_rate": 9.999685764924556e-06, "loss": 0.9845, "step": 948 }, { "epoch": 0.03352039171880312, "grad_norm": 1.9852503538131714, "learning_rate": 9.999679319313546e-06, "loss": 0.9626, "step": 949 }, { "epoch": 0.03355571352251103, "grad_norm": 2.125133514404297, "learning_rate": 9.999672808267673e-06, "loss": 0.973, "step": 950 }, { "epoch": 0.03359103532621893, "grad_norm": 2.0114898681640625, "learning_rate": 9.999666231787023e-06, "loss": 0.9533, "step": 951 }, { "epoch": 0.03362635712992684, "grad_norm": 2.1008095741271973, "learning_rate": 9.999659589871679e-06, "loss": 0.9827, "step": 952 }, { "epoch": 0.033661678933634746, "grad_norm": 2.1417622566223145, "learning_rate": 9.999652882521733e-06, "loss": 0.9805, "step": 953 }, { "epoch": 0.03369700073734265, "grad_norm": 2.224339008331299, "learning_rate": 9.99964610973727e-06, "loss": 0.9759, "step": 954 }, { "epoch": 0.03373232254105056, "grad_norm": 2.1842312812805176, "learning_rate": 9.999639271518375e-06, "loss": 0.9388, "step": 955 }, { "epoch": 0.033767644344758464, "grad_norm": 2.4727632999420166, "learning_rate": 9.999632367865144e-06, "loss": 0.642, "step": 956 }, { "epoch": 0.033802966148466375, "grad_norm": 2.108213424682617, "learning_rate": 9.999625398777663e-06, "loss": 0.9967, "step": 957 }, { "epoch": 0.03383828795217428, "grad_norm": 2.2936644554138184, "learning_rate": 9.999618364256025e-06, "loss": 0.984, "step": 958 }, { "epoch": 0.03387360975588218, "grad_norm": 1.9192525148391724, "learning_rate": 9.999611264300322e-06, "loss": 0.9803, "step": 959 }, { "epoch": 0.03390893155959009, "grad_norm": 1.8847893476486206, "learning_rate": 9.999604098910647e-06, "loss": 0.9443, "step": 960 }, { "epoch": 0.033944253363297996, "grad_norm": 1.8614623546600342, "learning_rate": 9.999596868087092e-06, "loss": 0.9557, "step": 961 }, { "epoch": 0.0339795751670059, "grad_norm": 2.2502472400665283, "learning_rate": 9.999589571829754e-06, "loss": 0.9703, "step": 962 }, { "epoch": 0.03401489697071381, "grad_norm": 2.017230987548828, "learning_rate": 9.999582210138727e-06, "loss": 0.9957, "step": 963 }, { "epoch": 0.034050218774421714, "grad_norm": 2.3891942501068115, "learning_rate": 9.99957478301411e-06, "loss": 0.974, "step": 964 }, { "epoch": 0.034085540578129625, "grad_norm": 2.0863687992095947, "learning_rate": 9.999567290455995e-06, "loss": 0.9876, "step": 965 }, { "epoch": 0.03412086238183753, "grad_norm": 2.1480560302734375, "learning_rate": 9.999559732464485e-06, "loss": 1.0067, "step": 966 }, { "epoch": 0.03415618418554543, "grad_norm": 1.9415192604064941, "learning_rate": 9.999552109039676e-06, "loss": 0.9484, "step": 967 }, { "epoch": 0.03419150598925334, "grad_norm": 2.140052556991577, "learning_rate": 9.999544420181668e-06, "loss": 1.0029, "step": 968 }, { "epoch": 0.03422682779296125, "grad_norm": 2.0475194454193115, "learning_rate": 9.999536665890565e-06, "loss": 0.9881, "step": 969 }, { "epoch": 0.03426214959666916, "grad_norm": 1.9578086137771606, "learning_rate": 9.999528846166464e-06, "loss": 0.9898, "step": 970 }, { "epoch": 0.03429747140037706, "grad_norm": 1.882225513458252, "learning_rate": 9.99952096100947e-06, "loss": 0.9456, "step": 971 }, { "epoch": 0.034332793204084965, "grad_norm": 2.6659798622131348, "learning_rate": 9.999513010419686e-06, "loss": 0.9619, "step": 972 }, { "epoch": 0.034368115007792875, "grad_norm": 1.7186172008514404, "learning_rate": 9.999504994397213e-06, "loss": 0.9634, "step": 973 }, { "epoch": 0.03440343681150078, "grad_norm": 1.967301607131958, "learning_rate": 9.999496912942162e-06, "loss": 0.9794, "step": 974 }, { "epoch": 0.03443875861520868, "grad_norm": 1.8881124258041382, "learning_rate": 9.999488766054633e-06, "loss": 0.9572, "step": 975 }, { "epoch": 0.03447408041891659, "grad_norm": 2.1222047805786133, "learning_rate": 9.999480553734735e-06, "loss": 0.9997, "step": 976 }, { "epoch": 0.0345094022226245, "grad_norm": 2.083186626434326, "learning_rate": 9.999472275982575e-06, "loss": 0.9596, "step": 977 }, { "epoch": 0.03454472402633241, "grad_norm": 1.9912992715835571, "learning_rate": 9.999463932798264e-06, "loss": 0.9598, "step": 978 }, { "epoch": 0.03458004583004031, "grad_norm": 1.795654535293579, "learning_rate": 9.999455524181906e-06, "loss": 0.9659, "step": 979 }, { "epoch": 0.034615367633748215, "grad_norm": 1.9562902450561523, "learning_rate": 9.999447050133615e-06, "loss": 0.9929, "step": 980 }, { "epoch": 0.034650689437456125, "grad_norm": 2.153080940246582, "learning_rate": 9.9994385106535e-06, "loss": 1.0116, "step": 981 }, { "epoch": 0.03468601124116403, "grad_norm": 2.001943349838257, "learning_rate": 9.999429905741675e-06, "loss": 1.0308, "step": 982 }, { "epoch": 0.03472133304487194, "grad_norm": 1.9314526319503784, "learning_rate": 9.99942123539825e-06, "loss": 1.005, "step": 983 }, { "epoch": 0.03475665484857984, "grad_norm": 1.8131319284439087, "learning_rate": 9.999412499623342e-06, "loss": 0.9223, "step": 984 }, { "epoch": 0.03479197665228775, "grad_norm": 2.3396222591400146, "learning_rate": 9.99940369841706e-06, "loss": 0.9947, "step": 985 }, { "epoch": 0.03482729845599566, "grad_norm": 2.0631160736083984, "learning_rate": 9.999394831779523e-06, "loss": 0.979, "step": 986 }, { "epoch": 0.03486262025970356, "grad_norm": 2.0379040241241455, "learning_rate": 9.999385899710848e-06, "loss": 0.9391, "step": 987 }, { "epoch": 0.034897942063411465, "grad_norm": 2.037989616394043, "learning_rate": 9.999376902211147e-06, "loss": 0.9523, "step": 988 }, { "epoch": 0.034933263867119375, "grad_norm": 1.5646812915802002, "learning_rate": 9.999367839280543e-06, "loss": 0.5732, "step": 989 }, { "epoch": 0.03496858567082728, "grad_norm": 2.2135114669799805, "learning_rate": 9.999358710919152e-06, "loss": 0.976, "step": 990 }, { "epoch": 0.03500390747453519, "grad_norm": 1.8950551748275757, "learning_rate": 9.999349517127096e-06, "loss": 0.9711, "step": 991 }, { "epoch": 0.03503922927824309, "grad_norm": 1.1837185621261597, "learning_rate": 9.999340257904489e-06, "loss": 0.5889, "step": 992 }, { "epoch": 0.035074551081951, "grad_norm": 2.3999712467193604, "learning_rate": 9.99933093325146e-06, "loss": 0.9743, "step": 993 }, { "epoch": 0.03510987288565891, "grad_norm": 2.0829215049743652, "learning_rate": 9.999321543168126e-06, "loss": 1.0007, "step": 994 }, { "epoch": 0.03514519468936681, "grad_norm": 2.256322145462036, "learning_rate": 9.999312087654612e-06, "loss": 0.9413, "step": 995 }, { "epoch": 0.03518051649307472, "grad_norm": 2.052353620529175, "learning_rate": 9.99930256671104e-06, "loss": 0.962, "step": 996 }, { "epoch": 0.035215838296782626, "grad_norm": 2.3949944972991943, "learning_rate": 9.999292980337538e-06, "loss": 0.9494, "step": 997 }, { "epoch": 0.03525116010049053, "grad_norm": 2.5379226207733154, "learning_rate": 9.999283328534227e-06, "loss": 0.9835, "step": 998 }, { "epoch": 0.03528648190419844, "grad_norm": 2.095229387283325, "learning_rate": 9.999273611301237e-06, "loss": 0.9903, "step": 999 }, { "epoch": 0.035321803707906343, "grad_norm": 1.9829288721084595, "learning_rate": 9.999263828638693e-06, "loss": 0.9662, "step": 1000 }, { "epoch": 0.03535712551161425, "grad_norm": 1.8698885440826416, "learning_rate": 9.999253980546723e-06, "loss": 0.9457, "step": 1001 }, { "epoch": 0.03539244731532216, "grad_norm": 2.250016450881958, "learning_rate": 9.999244067025459e-06, "loss": 0.961, "step": 1002 }, { "epoch": 0.03542776911903006, "grad_norm": 2.003608465194702, "learning_rate": 9.999234088075027e-06, "loss": 0.9686, "step": 1003 }, { "epoch": 0.03546309092273797, "grad_norm": 1.9407275915145874, "learning_rate": 9.99922404369556e-06, "loss": 0.9768, "step": 1004 }, { "epoch": 0.035498412726445876, "grad_norm": 2.0147554874420166, "learning_rate": 9.999213933887187e-06, "loss": 1.0158, "step": 1005 }, { "epoch": 0.03553373453015378, "grad_norm": 1.9081166982650757, "learning_rate": 9.999203758650042e-06, "loss": 0.9596, "step": 1006 }, { "epoch": 0.03556905633386169, "grad_norm": 1.8294929265975952, "learning_rate": 9.999193517984257e-06, "loss": 0.9547, "step": 1007 }, { "epoch": 0.035604378137569594, "grad_norm": 1.8653923273086548, "learning_rate": 9.999183211889968e-06, "loss": 0.9464, "step": 1008 }, { "epoch": 0.035639699941277504, "grad_norm": 2.0026907920837402, "learning_rate": 9.99917284036731e-06, "loss": 0.9512, "step": 1009 }, { "epoch": 0.03567502174498541, "grad_norm": 1.9370663166046143, "learning_rate": 9.999162403416417e-06, "loss": 0.9828, "step": 1010 }, { "epoch": 0.03571034354869331, "grad_norm": 1.855250597000122, "learning_rate": 9.999151901037427e-06, "loss": 0.9803, "step": 1011 }, { "epoch": 0.03574566535240122, "grad_norm": 1.9975029230117798, "learning_rate": 9.999141333230476e-06, "loss": 0.9741, "step": 1012 }, { "epoch": 0.035780987156109126, "grad_norm": 2.173319101333618, "learning_rate": 9.999130699995701e-06, "loss": 0.9801, "step": 1013 }, { "epoch": 0.035816308959817036, "grad_norm": 2.0255119800567627, "learning_rate": 9.999120001333245e-06, "loss": 0.9396, "step": 1014 }, { "epoch": 0.03585163076352494, "grad_norm": 1.8713034391403198, "learning_rate": 9.999109237243246e-06, "loss": 0.9112, "step": 1015 }, { "epoch": 0.035886952567232844, "grad_norm": 2.1672801971435547, "learning_rate": 9.999098407725845e-06, "loss": 0.961, "step": 1016 }, { "epoch": 0.035922274370940754, "grad_norm": 1.98032808303833, "learning_rate": 9.999087512781183e-06, "loss": 0.9483, "step": 1017 }, { "epoch": 0.03595759617464866, "grad_norm": 2.0908782482147217, "learning_rate": 9.999076552409404e-06, "loss": 0.9569, "step": 1018 }, { "epoch": 0.03599291797835656, "grad_norm": 2.0019876956939697, "learning_rate": 9.999065526610653e-06, "loss": 0.9472, "step": 1019 }, { "epoch": 0.03602823978206447, "grad_norm": 1.848923921585083, "learning_rate": 9.99905443538507e-06, "loss": 0.9215, "step": 1020 }, { "epoch": 0.036063561585772376, "grad_norm": 1.9383509159088135, "learning_rate": 9.999043278732801e-06, "loss": 0.9635, "step": 1021 }, { "epoch": 0.03609888338948029, "grad_norm": 2.0999741554260254, "learning_rate": 9.999032056653995e-06, "loss": 0.9732, "step": 1022 }, { "epoch": 0.03613420519318819, "grad_norm": 2.1977574825286865, "learning_rate": 9.999020769148796e-06, "loss": 0.9447, "step": 1023 }, { "epoch": 0.036169526996896094, "grad_norm": 2.0759975910186768, "learning_rate": 9.999009416217353e-06, "loss": 0.9727, "step": 1024 }, { "epoch": 0.036204848800604005, "grad_norm": 2.795996904373169, "learning_rate": 9.998997997859817e-06, "loss": 0.969, "step": 1025 }, { "epoch": 0.03624017060431191, "grad_norm": 2.126336097717285, "learning_rate": 9.998986514076332e-06, "loss": 0.968, "step": 1026 }, { "epoch": 0.03627549240801982, "grad_norm": 1.9998948574066162, "learning_rate": 9.998974964867052e-06, "loss": 1.0046, "step": 1027 }, { "epoch": 0.03631081421172772, "grad_norm": 1.964468240737915, "learning_rate": 9.998963350232127e-06, "loss": 0.9569, "step": 1028 }, { "epoch": 0.036346136015435626, "grad_norm": 2.1071362495422363, "learning_rate": 9.99895167017171e-06, "loss": 1.0019, "step": 1029 }, { "epoch": 0.03638145781914354, "grad_norm": 2.029313087463379, "learning_rate": 9.998939924685953e-06, "loss": 1.0283, "step": 1030 }, { "epoch": 0.03641677962285144, "grad_norm": 1.9386943578720093, "learning_rate": 9.99892811377501e-06, "loss": 0.9386, "step": 1031 }, { "epoch": 0.036452101426559344, "grad_norm": 2.0515074729919434, "learning_rate": 9.998916237439036e-06, "loss": 0.9566, "step": 1032 }, { "epoch": 0.036487423230267255, "grad_norm": 2.10598087310791, "learning_rate": 9.998904295678185e-06, "loss": 1.0073, "step": 1033 }, { "epoch": 0.03652274503397516, "grad_norm": 2.205939769744873, "learning_rate": 9.998892288492615e-06, "loss": 0.9574, "step": 1034 }, { "epoch": 0.03655806683768307, "grad_norm": 2.3069937229156494, "learning_rate": 9.998880215882483e-06, "loss": 0.9433, "step": 1035 }, { "epoch": 0.03659338864139097, "grad_norm": 1.889463186264038, "learning_rate": 9.998868077847945e-06, "loss": 0.9909, "step": 1036 }, { "epoch": 0.036628710445098876, "grad_norm": 2.0246875286102295, "learning_rate": 9.998855874389162e-06, "loss": 1.0058, "step": 1037 }, { "epoch": 0.03666403224880679, "grad_norm": 1.893221139907837, "learning_rate": 9.998843605506294e-06, "loss": 0.9789, "step": 1038 }, { "epoch": 0.03669935405251469, "grad_norm": 2.139484167098999, "learning_rate": 9.998831271199499e-06, "loss": 0.9524, "step": 1039 }, { "epoch": 0.0367346758562226, "grad_norm": 1.8393733501434326, "learning_rate": 9.99881887146894e-06, "loss": 0.9491, "step": 1040 }, { "epoch": 0.036769997659930505, "grad_norm": 2.0939204692840576, "learning_rate": 9.99880640631478e-06, "loss": 0.9411, "step": 1041 }, { "epoch": 0.03680531946363841, "grad_norm": 1.992256999015808, "learning_rate": 9.998793875737182e-06, "loss": 0.9773, "step": 1042 }, { "epoch": 0.03684064126734632, "grad_norm": 1.9647825956344604, "learning_rate": 9.998781279736308e-06, "loss": 0.9482, "step": 1043 }, { "epoch": 0.03687596307105422, "grad_norm": 1.958419680595398, "learning_rate": 9.998768618312324e-06, "loss": 0.9357, "step": 1044 }, { "epoch": 0.036911284874762126, "grad_norm": 1.8089869022369385, "learning_rate": 9.998755891465399e-06, "loss": 0.9375, "step": 1045 }, { "epoch": 0.03694660667847004, "grad_norm": 1.9141590595245361, "learning_rate": 9.998743099195694e-06, "loss": 0.9058, "step": 1046 }, { "epoch": 0.03698192848217794, "grad_norm": 2.007232189178467, "learning_rate": 9.99873024150338e-06, "loss": 0.966, "step": 1047 }, { "epoch": 0.03701725028588585, "grad_norm": 2.034656286239624, "learning_rate": 9.998717318388622e-06, "loss": 0.9918, "step": 1048 }, { "epoch": 0.037052572089593755, "grad_norm": 2.0793254375457764, "learning_rate": 9.998704329851595e-06, "loss": 0.9591, "step": 1049 }, { "epoch": 0.03708789389330166, "grad_norm": 2.007396697998047, "learning_rate": 9.998691275892463e-06, "loss": 0.9464, "step": 1050 }, { "epoch": 0.03712321569700957, "grad_norm": 1.8752555847167969, "learning_rate": 9.998678156511398e-06, "loss": 0.9189, "step": 1051 }, { "epoch": 0.03715853750071747, "grad_norm": 1.9067304134368896, "learning_rate": 9.998664971708577e-06, "loss": 0.9156, "step": 1052 }, { "epoch": 0.03719385930442538, "grad_norm": 1.9311792850494385, "learning_rate": 9.998651721484164e-06, "loss": 0.9534, "step": 1053 }, { "epoch": 0.03722918110813329, "grad_norm": 2.0009372234344482, "learning_rate": 9.99863840583834e-06, "loss": 0.9593, "step": 1054 }, { "epoch": 0.03726450291184119, "grad_norm": 2.2390732765197754, "learning_rate": 9.998625024771276e-06, "loss": 0.9386, "step": 1055 }, { "epoch": 0.0372998247155491, "grad_norm": 1.843193769454956, "learning_rate": 9.998611578283147e-06, "loss": 0.9805, "step": 1056 }, { "epoch": 0.037335146519257005, "grad_norm": 1.9402191638946533, "learning_rate": 9.998598066374128e-06, "loss": 0.9371, "step": 1057 }, { "epoch": 0.037370468322964916, "grad_norm": 2.049712657928467, "learning_rate": 9.998584489044398e-06, "loss": 0.9654, "step": 1058 }, { "epoch": 0.03740579012667282, "grad_norm": 1.9156588315963745, "learning_rate": 9.998570846294134e-06, "loss": 0.9501, "step": 1059 }, { "epoch": 0.03744111193038072, "grad_norm": 2.051478147506714, "learning_rate": 9.998557138123515e-06, "loss": 0.9736, "step": 1060 }, { "epoch": 0.037476433734088634, "grad_norm": 2.1512813568115234, "learning_rate": 9.998543364532718e-06, "loss": 0.9349, "step": 1061 }, { "epoch": 0.03751175553779654, "grad_norm": 2.050227642059326, "learning_rate": 9.998529525521928e-06, "loss": 0.9756, "step": 1062 }, { "epoch": 0.03754707734150444, "grad_norm": 2.201054573059082, "learning_rate": 9.99851562109132e-06, "loss": 0.9902, "step": 1063 }, { "epoch": 0.03758239914521235, "grad_norm": 1.9478145837783813, "learning_rate": 9.99850165124108e-06, "loss": 0.9621, "step": 1064 }, { "epoch": 0.037617720948920255, "grad_norm": 2.138092041015625, "learning_rate": 9.99848761597139e-06, "loss": 0.9135, "step": 1065 }, { "epoch": 0.037653042752628166, "grad_norm": 2.059462070465088, "learning_rate": 9.998473515282434e-06, "loss": 0.9479, "step": 1066 }, { "epoch": 0.03768836455633607, "grad_norm": 1.9272338151931763, "learning_rate": 9.998459349174398e-06, "loss": 0.9548, "step": 1067 }, { "epoch": 0.03772368636004397, "grad_norm": 1.936422348022461, "learning_rate": 9.998445117647464e-06, "loss": 0.9573, "step": 1068 }, { "epoch": 0.037759008163751884, "grad_norm": 2.195708990097046, "learning_rate": 9.99843082070182e-06, "loss": 0.9818, "step": 1069 }, { "epoch": 0.03779432996745979, "grad_norm": 2.168201446533203, "learning_rate": 9.998416458337652e-06, "loss": 0.9611, "step": 1070 }, { "epoch": 0.0378296517711677, "grad_norm": 1.885438084602356, "learning_rate": 9.998402030555151e-06, "loss": 0.9734, "step": 1071 }, { "epoch": 0.0378649735748756, "grad_norm": 2.173412322998047, "learning_rate": 9.998387537354502e-06, "loss": 0.9788, "step": 1072 }, { "epoch": 0.037900295378583505, "grad_norm": 2.4589250087738037, "learning_rate": 9.998372978735897e-06, "loss": 0.9209, "step": 1073 }, { "epoch": 0.037935617182291416, "grad_norm": 2.0654351711273193, "learning_rate": 9.998358354699527e-06, "loss": 0.9605, "step": 1074 }, { "epoch": 0.03797093898599932, "grad_norm": 2.0442006587982178, "learning_rate": 9.998343665245582e-06, "loss": 0.9299, "step": 1075 }, { "epoch": 0.03800626078970722, "grad_norm": 2.1098577976226807, "learning_rate": 9.998328910374256e-06, "loss": 0.9416, "step": 1076 }, { "epoch": 0.038041582593415134, "grad_norm": 1.9054255485534668, "learning_rate": 9.998314090085737e-06, "loss": 0.9969, "step": 1077 }, { "epoch": 0.03807690439712304, "grad_norm": 1.9640254974365234, "learning_rate": 9.998299204380227e-06, "loss": 0.9652, "step": 1078 }, { "epoch": 0.03811222620083095, "grad_norm": 2.0515642166137695, "learning_rate": 9.998284253257913e-06, "loss": 0.9698, "step": 1079 }, { "epoch": 0.03814754800453885, "grad_norm": 2.234767436981201, "learning_rate": 9.998269236718996e-06, "loss": 0.9716, "step": 1080 }, { "epoch": 0.038182869808246755, "grad_norm": 2.1410083770751953, "learning_rate": 9.998254154763671e-06, "loss": 0.9832, "step": 1081 }, { "epoch": 0.038218191611954666, "grad_norm": 1.9468070268630981, "learning_rate": 9.998239007392135e-06, "loss": 0.9733, "step": 1082 }, { "epoch": 0.03825351341566257, "grad_norm": 1.901720404624939, "learning_rate": 9.998223794604588e-06, "loss": 0.9428, "step": 1083 }, { "epoch": 0.03828883521937048, "grad_norm": 2.247246265411377, "learning_rate": 9.998208516401224e-06, "loss": 0.9495, "step": 1084 }, { "epoch": 0.038324157023078384, "grad_norm": 5.292239665985107, "learning_rate": 9.998193172782249e-06, "loss": 0.9553, "step": 1085 }, { "epoch": 0.03835947882678629, "grad_norm": 1.981730341911316, "learning_rate": 9.99817776374786e-06, "loss": 0.9562, "step": 1086 }, { "epoch": 0.0383948006304942, "grad_norm": 2.054373264312744, "learning_rate": 9.998162289298262e-06, "loss": 0.9591, "step": 1087 }, { "epoch": 0.0384301224342021, "grad_norm": 1.979702115058899, "learning_rate": 9.998146749433653e-06, "loss": 0.9552, "step": 1088 }, { "epoch": 0.038465444237910006, "grad_norm": 1.9509724378585815, "learning_rate": 9.998131144154242e-06, "loss": 0.9634, "step": 1089 }, { "epoch": 0.038500766041617916, "grad_norm": 1.820525884628296, "learning_rate": 9.998115473460227e-06, "loss": 0.9531, "step": 1090 }, { "epoch": 0.03853608784532582, "grad_norm": 1.9355241060256958, "learning_rate": 9.998099737351819e-06, "loss": 0.9401, "step": 1091 }, { "epoch": 0.03857140964903373, "grad_norm": 2.201286792755127, "learning_rate": 9.998083935829219e-06, "loss": 0.9865, "step": 1092 }, { "epoch": 0.038606731452741634, "grad_norm": 1.9841710329055786, "learning_rate": 9.998068068892636e-06, "loss": 0.9618, "step": 1093 }, { "epoch": 0.03864205325644954, "grad_norm": 7.23026704788208, "learning_rate": 9.998052136542277e-06, "loss": 0.9286, "step": 1094 }, { "epoch": 0.03867737506015745, "grad_norm": 2.263178586959839, "learning_rate": 9.998036138778352e-06, "loss": 1.0442, "step": 1095 }, { "epoch": 0.03871269686386535, "grad_norm": 2.022505044937134, "learning_rate": 9.998020075601071e-06, "loss": 0.9719, "step": 1096 }, { "epoch": 0.03874801866757326, "grad_norm": 1.9358878135681152, "learning_rate": 9.998003947010643e-06, "loss": 0.952, "step": 1097 }, { "epoch": 0.038783340471281166, "grad_norm": 2.0053951740264893, "learning_rate": 9.997987753007277e-06, "loss": 0.9726, "step": 1098 }, { "epoch": 0.03881866227498907, "grad_norm": 2.004270076751709, "learning_rate": 9.997971493591188e-06, "loss": 0.9707, "step": 1099 }, { "epoch": 0.03885398407869698, "grad_norm": 2.042980194091797, "learning_rate": 9.997955168762587e-06, "loss": 1.0161, "step": 1100 }, { "epoch": 0.038889305882404884, "grad_norm": 1.8404245376586914, "learning_rate": 9.997938778521687e-06, "loss": 0.9598, "step": 1101 }, { "epoch": 0.03892462768611279, "grad_norm": 1.7390135526657104, "learning_rate": 9.997922322868707e-06, "loss": 0.9255, "step": 1102 }, { "epoch": 0.0389599494898207, "grad_norm": 2.2612574100494385, "learning_rate": 9.997905801803856e-06, "loss": 0.9963, "step": 1103 }, { "epoch": 0.0389952712935286, "grad_norm": 1.8849347829818726, "learning_rate": 9.997889215327354e-06, "loss": 0.9527, "step": 1104 }, { "epoch": 0.03903059309723651, "grad_norm": 1.8888497352600098, "learning_rate": 9.99787256343942e-06, "loss": 0.956, "step": 1105 }, { "epoch": 0.039065914900944416, "grad_norm": 1.9845610857009888, "learning_rate": 9.997855846140266e-06, "loss": 0.9542, "step": 1106 }, { "epoch": 0.03910123670465232, "grad_norm": 2.153146505355835, "learning_rate": 9.997839063430118e-06, "loss": 0.9796, "step": 1107 }, { "epoch": 0.03913655850836023, "grad_norm": 1.984729290008545, "learning_rate": 9.997822215309188e-06, "loss": 0.9443, "step": 1108 }, { "epoch": 0.039171880312068134, "grad_norm": 1.9184839725494385, "learning_rate": 9.997805301777701e-06, "loss": 0.6186, "step": 1109 }, { "epoch": 0.039207202115776045, "grad_norm": 2.5164682865142822, "learning_rate": 9.99778832283588e-06, "loss": 0.997, "step": 1110 }, { "epoch": 0.03924252391948395, "grad_norm": 2.248701572418213, "learning_rate": 9.997771278483943e-06, "loss": 0.9692, "step": 1111 }, { "epoch": 0.03927784572319185, "grad_norm": 1.881630778312683, "learning_rate": 9.997754168722114e-06, "loss": 0.9565, "step": 1112 }, { "epoch": 0.03931316752689976, "grad_norm": 2.048135280609131, "learning_rate": 9.99773699355062e-06, "loss": 0.9122, "step": 1113 }, { "epoch": 0.03934848933060767, "grad_norm": 1.9670121669769287, "learning_rate": 9.997719752969684e-06, "loss": 0.9669, "step": 1114 }, { "epoch": 0.03938381113431558, "grad_norm": 2.083972930908203, "learning_rate": 9.99770244697953e-06, "loss": 0.9706, "step": 1115 }, { "epoch": 0.03941913293802348, "grad_norm": 2.1613683700561523, "learning_rate": 9.997685075580385e-06, "loss": 0.9895, "step": 1116 }, { "epoch": 0.039454454741731385, "grad_norm": 2.1943881511688232, "learning_rate": 9.99766763877248e-06, "loss": 0.9064, "step": 1117 }, { "epoch": 0.039489776545439295, "grad_norm": 2.3176779747009277, "learning_rate": 9.997650136556039e-06, "loss": 0.9717, "step": 1118 }, { "epoch": 0.0395250983491472, "grad_norm": 1.893933892250061, "learning_rate": 9.99763256893129e-06, "loss": 0.9506, "step": 1119 }, { "epoch": 0.0395604201528551, "grad_norm": 1.9548527002334595, "learning_rate": 9.99761493589847e-06, "loss": 0.9672, "step": 1120 }, { "epoch": 0.03959574195656301, "grad_norm": 2.1335461139678955, "learning_rate": 9.997597237457802e-06, "loss": 0.9707, "step": 1121 }, { "epoch": 0.03963106376027092, "grad_norm": 2.0317397117614746, "learning_rate": 9.997579473609521e-06, "loss": 0.9561, "step": 1122 }, { "epoch": 0.03966638556397883, "grad_norm": 1.9321569204330444, "learning_rate": 9.997561644353859e-06, "loss": 0.9459, "step": 1123 }, { "epoch": 0.03970170736768673, "grad_norm": 2.007422685623169, "learning_rate": 9.997543749691052e-06, "loss": 0.9685, "step": 1124 }, { "epoch": 0.039737029171394635, "grad_norm": 2.130258560180664, "learning_rate": 9.997525789621329e-06, "loss": 0.9238, "step": 1125 }, { "epoch": 0.039772350975102545, "grad_norm": 1.856889009475708, "learning_rate": 9.99750776414493e-06, "loss": 0.9271, "step": 1126 }, { "epoch": 0.03980767277881045, "grad_norm": 2.0012269020080566, "learning_rate": 9.997489673262089e-06, "loss": 0.979, "step": 1127 }, { "epoch": 0.03984299458251836, "grad_norm": 1.8671901226043701, "learning_rate": 9.99747151697304e-06, "loss": 0.9763, "step": 1128 }, { "epoch": 0.03987831638622626, "grad_norm": 2.009514093399048, "learning_rate": 9.997453295278025e-06, "loss": 0.9786, "step": 1129 }, { "epoch": 0.03991363818993417, "grad_norm": 2.4594953060150146, "learning_rate": 9.99743500817728e-06, "loss": 0.9708, "step": 1130 }, { "epoch": 0.03994895999364208, "grad_norm": 2.5254738330841064, "learning_rate": 9.997416655671046e-06, "loss": 0.9542, "step": 1131 }, { "epoch": 0.03998428179734998, "grad_norm": 1.9345513582229614, "learning_rate": 9.997398237759563e-06, "loss": 0.9582, "step": 1132 }, { "epoch": 0.040019603601057885, "grad_norm": 1.9976871013641357, "learning_rate": 9.997379754443069e-06, "loss": 0.9857, "step": 1133 }, { "epoch": 0.040054925404765795, "grad_norm": 1.8804184198379517, "learning_rate": 9.99736120572181e-06, "loss": 0.9361, "step": 1134 }, { "epoch": 0.0400902472084737, "grad_norm": 2.147118330001831, "learning_rate": 9.997342591596027e-06, "loss": 0.9643, "step": 1135 }, { "epoch": 0.04012556901218161, "grad_norm": 2.042856454849243, "learning_rate": 9.997323912065961e-06, "loss": 0.9563, "step": 1136 }, { "epoch": 0.04016089081588951, "grad_norm": 1.851253867149353, "learning_rate": 9.99730516713186e-06, "loss": 0.9425, "step": 1137 }, { "epoch": 0.04019621261959742, "grad_norm": 1.9587974548339844, "learning_rate": 9.997286356793972e-06, "loss": 0.9543, "step": 1138 }, { "epoch": 0.04023153442330533, "grad_norm": 2.0672338008880615, "learning_rate": 9.997267481052535e-06, "loss": 0.9816, "step": 1139 }, { "epoch": 0.04026685622701323, "grad_norm": 2.0790746212005615, "learning_rate": 9.997248539907802e-06, "loss": 0.9959, "step": 1140 }, { "epoch": 0.04030217803072114, "grad_norm": 2.1268651485443115, "learning_rate": 9.99722953336002e-06, "loss": 0.954, "step": 1141 }, { "epoch": 0.040337499834429046, "grad_norm": 2.0833542346954346, "learning_rate": 9.997210461409438e-06, "loss": 0.9843, "step": 1142 }, { "epoch": 0.04037282163813695, "grad_norm": 1.9714692831039429, "learning_rate": 9.997191324056303e-06, "loss": 0.9623, "step": 1143 }, { "epoch": 0.04040814344184486, "grad_norm": 2.186363458633423, "learning_rate": 9.99717212130087e-06, "loss": 0.9454, "step": 1144 }, { "epoch": 0.040443465245552764, "grad_norm": 2.1012704372406006, "learning_rate": 9.997152853143385e-06, "loss": 0.9518, "step": 1145 }, { "epoch": 0.04047878704926067, "grad_norm": 2.3212506771087646, "learning_rate": 9.997133519584104e-06, "loss": 0.9994, "step": 1146 }, { "epoch": 0.04051410885296858, "grad_norm": 2.0038959980010986, "learning_rate": 9.997114120623278e-06, "loss": 0.9466, "step": 1147 }, { "epoch": 0.04054943065667648, "grad_norm": 2.1448144912719727, "learning_rate": 9.997094656261161e-06, "loss": 0.9654, "step": 1148 }, { "epoch": 0.04058475246038439, "grad_norm": 2.1108736991882324, "learning_rate": 9.99707512649801e-06, "loss": 0.9875, "step": 1149 }, { "epoch": 0.040620074264092296, "grad_norm": 2.1737191677093506, "learning_rate": 9.99705553133408e-06, "loss": 0.9591, "step": 1150 }, { "epoch": 0.0406553960678002, "grad_norm": 1.8946588039398193, "learning_rate": 9.997035870769624e-06, "loss": 0.9217, "step": 1151 }, { "epoch": 0.04069071787150811, "grad_norm": 2.0572452545166016, "learning_rate": 9.997016144804905e-06, "loss": 0.9232, "step": 1152 }, { "epoch": 0.040726039675216014, "grad_norm": 1.8821771144866943, "learning_rate": 9.996996353440176e-06, "loss": 0.98, "step": 1153 }, { "epoch": 0.040761361478923924, "grad_norm": 2.0291686058044434, "learning_rate": 9.996976496675699e-06, "loss": 0.9593, "step": 1154 }, { "epoch": 0.04079668328263183, "grad_norm": 2.5329957008361816, "learning_rate": 9.996956574511732e-06, "loss": 0.9274, "step": 1155 }, { "epoch": 0.04083200508633973, "grad_norm": 1.9632477760314941, "learning_rate": 9.996936586948536e-06, "loss": 0.9611, "step": 1156 }, { "epoch": 0.04086732689004764, "grad_norm": 1.806433916091919, "learning_rate": 9.996916533986375e-06, "loss": 0.9456, "step": 1157 }, { "epoch": 0.040902648693755546, "grad_norm": 2.124499559402466, "learning_rate": 9.99689641562551e-06, "loss": 0.9697, "step": 1158 }, { "epoch": 0.04093797049746345, "grad_norm": 1.8778067827224731, "learning_rate": 9.996876231866201e-06, "loss": 0.9475, "step": 1159 }, { "epoch": 0.04097329230117136, "grad_norm": 2.514343023300171, "learning_rate": 9.996855982708717e-06, "loss": 0.9867, "step": 1160 }, { "epoch": 0.041008614104879264, "grad_norm": 2.0373940467834473, "learning_rate": 9.996835668153322e-06, "loss": 0.9713, "step": 1161 }, { "epoch": 0.041043935908587174, "grad_norm": 1.9788460731506348, "learning_rate": 9.996815288200282e-06, "loss": 0.9816, "step": 1162 }, { "epoch": 0.04107925771229508, "grad_norm": 1.9457303285598755, "learning_rate": 9.996794842849859e-06, "loss": 0.9549, "step": 1163 }, { "epoch": 0.04111457951600298, "grad_norm": 2.0209617614746094, "learning_rate": 9.996774332102328e-06, "loss": 0.9555, "step": 1164 }, { "epoch": 0.04114990131971089, "grad_norm": 2.1062674522399902, "learning_rate": 9.996753755957953e-06, "loss": 0.9901, "step": 1165 }, { "epoch": 0.041185223123418796, "grad_norm": 1.9517114162445068, "learning_rate": 9.996733114417003e-06, "loss": 0.9815, "step": 1166 }, { "epoch": 0.04122054492712671, "grad_norm": 2.032081127166748, "learning_rate": 9.99671240747975e-06, "loss": 0.9361, "step": 1167 }, { "epoch": 0.04125586673083461, "grad_norm": 2.000352621078491, "learning_rate": 9.996691635146464e-06, "loss": 0.9744, "step": 1168 }, { "epoch": 0.041291188534542514, "grad_norm": 1.831242561340332, "learning_rate": 9.996670797417416e-06, "loss": 0.9277, "step": 1169 }, { "epoch": 0.041326510338250425, "grad_norm": 1.86365807056427, "learning_rate": 9.996649894292882e-06, "loss": 0.9775, "step": 1170 }, { "epoch": 0.04136183214195833, "grad_norm": 1.972866415977478, "learning_rate": 9.996628925773133e-06, "loss": 0.9598, "step": 1171 }, { "epoch": 0.04139715394566624, "grad_norm": 1.9009422063827515, "learning_rate": 9.996607891858443e-06, "loss": 0.9146, "step": 1172 }, { "epoch": 0.04143247574937414, "grad_norm": 1.8298485279083252, "learning_rate": 9.99658679254909e-06, "loss": 0.9865, "step": 1173 }, { "epoch": 0.041467797553082046, "grad_norm": 1.8132191896438599, "learning_rate": 9.996565627845347e-06, "loss": 0.9634, "step": 1174 }, { "epoch": 0.04150311935678996, "grad_norm": 2.4209182262420654, "learning_rate": 9.996544397747492e-06, "loss": 0.9895, "step": 1175 }, { "epoch": 0.04153844116049786, "grad_norm": 1.9866341352462769, "learning_rate": 9.996523102255802e-06, "loss": 0.9462, "step": 1176 }, { "epoch": 0.041573762964205764, "grad_norm": 2.0675902366638184, "learning_rate": 9.996501741370558e-06, "loss": 0.9303, "step": 1177 }, { "epoch": 0.041609084767913675, "grad_norm": 1.974376916885376, "learning_rate": 9.99648031509204e-06, "loss": 0.9224, "step": 1178 }, { "epoch": 0.04164440657162158, "grad_norm": 1.8977831602096558, "learning_rate": 9.996458823420527e-06, "loss": 0.9621, "step": 1179 }, { "epoch": 0.04167972837532949, "grad_norm": 1.864255666732788, "learning_rate": 9.996437266356298e-06, "loss": 0.96, "step": 1180 }, { "epoch": 0.04171505017903739, "grad_norm": 1.9402222633361816, "learning_rate": 9.996415643899638e-06, "loss": 0.9657, "step": 1181 }, { "epoch": 0.041750371982745296, "grad_norm": 2.0427844524383545, "learning_rate": 9.99639395605083e-06, "loss": 0.9548, "step": 1182 }, { "epoch": 0.04178569378645321, "grad_norm": 2.5174715518951416, "learning_rate": 9.996372202810157e-06, "loss": 0.9437, "step": 1183 }, { "epoch": 0.04182101559016111, "grad_norm": 2.1821134090423584, "learning_rate": 9.996350384177902e-06, "loss": 0.9143, "step": 1184 }, { "epoch": 0.04185633739386902, "grad_norm": 1.9405847787857056, "learning_rate": 9.996328500154354e-06, "loss": 0.9621, "step": 1185 }, { "epoch": 0.041891659197576925, "grad_norm": 2.3808865547180176, "learning_rate": 9.996306550739798e-06, "loss": 0.926, "step": 1186 }, { "epoch": 0.04192698100128483, "grad_norm": 2.1484739780426025, "learning_rate": 9.996284535934521e-06, "loss": 0.9689, "step": 1187 }, { "epoch": 0.04196230280499274, "grad_norm": 1.9875379800796509, "learning_rate": 9.99626245573881e-06, "loss": 0.9126, "step": 1188 }, { "epoch": 0.04199762460870064, "grad_norm": 1.736223816871643, "learning_rate": 9.996240310152956e-06, "loss": 0.9338, "step": 1189 }, { "epoch": 0.042032946412408546, "grad_norm": 2.175405263900757, "learning_rate": 9.996218099177248e-06, "loss": 0.9286, "step": 1190 }, { "epoch": 0.04206826821611646, "grad_norm": 2.528433084487915, "learning_rate": 9.996195822811976e-06, "loss": 0.9601, "step": 1191 }, { "epoch": 0.04210359001982436, "grad_norm": 2.054441213607788, "learning_rate": 9.996173481057435e-06, "loss": 0.9732, "step": 1192 }, { "epoch": 0.04213891182353227, "grad_norm": 1.930834174156189, "learning_rate": 9.996151073913911e-06, "loss": 0.9128, "step": 1193 }, { "epoch": 0.042174233627240175, "grad_norm": 1.9559705257415771, "learning_rate": 9.996128601381702e-06, "loss": 0.9538, "step": 1194 }, { "epoch": 0.04220955543094808, "grad_norm": 2.264240264892578, "learning_rate": 9.996106063461103e-06, "loss": 0.9622, "step": 1195 }, { "epoch": 0.04224487723465599, "grad_norm": 2.081967830657959, "learning_rate": 9.996083460152406e-06, "loss": 0.9566, "step": 1196 }, { "epoch": 0.04228019903836389, "grad_norm": 2.062252998352051, "learning_rate": 9.996060791455907e-06, "loss": 0.9564, "step": 1197 }, { "epoch": 0.042315520842071803, "grad_norm": 2.041191577911377, "learning_rate": 9.996038057371904e-06, "loss": 0.989, "step": 1198 }, { "epoch": 0.04235084264577971, "grad_norm": 1.9988722801208496, "learning_rate": 9.996015257900694e-06, "loss": 0.9679, "step": 1199 }, { "epoch": 0.04238616444948761, "grad_norm": 2.0183751583099365, "learning_rate": 9.995992393042575e-06, "loss": 0.9763, "step": 1200 }, { "epoch": 0.04242148625319552, "grad_norm": 1.969663143157959, "learning_rate": 9.995969462797848e-06, "loss": 0.971, "step": 1201 }, { "epoch": 0.042456808056903425, "grad_norm": 2.057284355163574, "learning_rate": 9.99594646716681e-06, "loss": 0.921, "step": 1202 }, { "epoch": 0.04249212986061133, "grad_norm": 1.8472381830215454, "learning_rate": 9.995923406149765e-06, "loss": 0.9689, "step": 1203 }, { "epoch": 0.04252745166431924, "grad_norm": 1.871332049369812, "learning_rate": 9.995900279747012e-06, "loss": 0.94, "step": 1204 }, { "epoch": 0.04256277346802714, "grad_norm": 1.7625524997711182, "learning_rate": 9.995877087958858e-06, "loss": 0.9613, "step": 1205 }, { "epoch": 0.042598095271735054, "grad_norm": 1.7457969188690186, "learning_rate": 9.995853830785602e-06, "loss": 0.9694, "step": 1206 }, { "epoch": 0.04263341707544296, "grad_norm": 2.0000386238098145, "learning_rate": 9.995830508227551e-06, "loss": 0.9885, "step": 1207 }, { "epoch": 0.04266873887915086, "grad_norm": 1.7296743392944336, "learning_rate": 9.995807120285008e-06, "loss": 0.9391, "step": 1208 }, { "epoch": 0.04270406068285877, "grad_norm": 2.218945026397705, "learning_rate": 9.995783666958283e-06, "loss": 0.9874, "step": 1209 }, { "epoch": 0.042739382486566675, "grad_norm": 1.9006357192993164, "learning_rate": 9.995760148247678e-06, "loss": 0.9826, "step": 1210 }, { "epoch": 0.042774704290274586, "grad_norm": 1.8037736415863037, "learning_rate": 9.995736564153504e-06, "loss": 0.948, "step": 1211 }, { "epoch": 0.04281002609398249, "grad_norm": 1.8482069969177246, "learning_rate": 9.99571291467607e-06, "loss": 0.9193, "step": 1212 }, { "epoch": 0.04284534789769039, "grad_norm": 1.9864569902420044, "learning_rate": 9.995689199815684e-06, "loss": 0.9423, "step": 1213 }, { "epoch": 0.042880669701398304, "grad_norm": 1.9906591176986694, "learning_rate": 9.995665419572655e-06, "loss": 0.9719, "step": 1214 }, { "epoch": 0.04291599150510621, "grad_norm": 2.0313162803649902, "learning_rate": 9.9956415739473e-06, "loss": 0.9769, "step": 1215 }, { "epoch": 0.04295131330881412, "grad_norm": 2.2990641593933105, "learning_rate": 9.995617662939922e-06, "loss": 0.95, "step": 1216 }, { "epoch": 0.04298663511252202, "grad_norm": 2.269400119781494, "learning_rate": 9.995593686550843e-06, "loss": 0.9653, "step": 1217 }, { "epoch": 0.043021956916229925, "grad_norm": 2.1841177940368652, "learning_rate": 9.995569644780372e-06, "loss": 0.962, "step": 1218 }, { "epoch": 0.043057278719937836, "grad_norm": 2.1346168518066406, "learning_rate": 9.995545537628824e-06, "loss": 0.9744, "step": 1219 }, { "epoch": 0.04309260052364574, "grad_norm": 2.1023483276367188, "learning_rate": 9.995521365096515e-06, "loss": 0.9336, "step": 1220 }, { "epoch": 0.04312792232735364, "grad_norm": 1.9686099290847778, "learning_rate": 9.995497127183762e-06, "loss": 0.919, "step": 1221 }, { "epoch": 0.043163244131061554, "grad_norm": 2.4687304496765137, "learning_rate": 9.995472823890882e-06, "loss": 0.937, "step": 1222 }, { "epoch": 0.04319856593476946, "grad_norm": 1.9131674766540527, "learning_rate": 9.995448455218192e-06, "loss": 0.9582, "step": 1223 }, { "epoch": 0.04323388773847737, "grad_norm": 1.9617701768875122, "learning_rate": 9.995424021166012e-06, "loss": 0.9444, "step": 1224 }, { "epoch": 0.04326920954218527, "grad_norm": 1.937009572982788, "learning_rate": 9.995399521734662e-06, "loss": 0.9441, "step": 1225 }, { "epoch": 0.043304531345893175, "grad_norm": 2.120244026184082, "learning_rate": 9.995374956924459e-06, "loss": 0.9307, "step": 1226 }, { "epoch": 0.043339853149601086, "grad_norm": 1.8018485307693481, "learning_rate": 9.995350326735732e-06, "loss": 0.9382, "step": 1227 }, { "epoch": 0.04337517495330899, "grad_norm": 2.1095237731933594, "learning_rate": 9.995325631168797e-06, "loss": 0.9416, "step": 1228 }, { "epoch": 0.0434104967570169, "grad_norm": 1.871681571006775, "learning_rate": 9.995300870223978e-06, "loss": 0.9379, "step": 1229 }, { "epoch": 0.043445818560724804, "grad_norm": 1.846325159072876, "learning_rate": 9.995276043901602e-06, "loss": 0.9447, "step": 1230 }, { "epoch": 0.04348114036443271, "grad_norm": 1.9390480518341064, "learning_rate": 9.995251152201992e-06, "loss": 0.9596, "step": 1231 }, { "epoch": 0.04351646216814062, "grad_norm": 1.8437764644622803, "learning_rate": 9.995226195125473e-06, "loss": 0.9802, "step": 1232 }, { "epoch": 0.04355178397184852, "grad_norm": 1.850196123123169, "learning_rate": 9.995201172672372e-06, "loss": 0.9572, "step": 1233 }, { "epoch": 0.043587105775556426, "grad_norm": 2.0952930450439453, "learning_rate": 9.995176084843018e-06, "loss": 0.9326, "step": 1234 }, { "epoch": 0.043622427579264336, "grad_norm": 1.7496298551559448, "learning_rate": 9.995150931637737e-06, "loss": 0.9522, "step": 1235 }, { "epoch": 0.04365774938297224, "grad_norm": 1.9959800243377686, "learning_rate": 9.995125713056862e-06, "loss": 0.9151, "step": 1236 }, { "epoch": 0.04369307118668015, "grad_norm": 1.8172049522399902, "learning_rate": 9.99510042910072e-06, "loss": 0.912, "step": 1237 }, { "epoch": 0.043728392990388054, "grad_norm": 1.8787615299224854, "learning_rate": 9.995075079769643e-06, "loss": 0.9473, "step": 1238 }, { "epoch": 0.04376371479409596, "grad_norm": 1.8273354768753052, "learning_rate": 9.99504966506396e-06, "loss": 0.9665, "step": 1239 }, { "epoch": 0.04379903659780387, "grad_norm": 2.0938384532928467, "learning_rate": 9.995024184984007e-06, "loss": 0.9557, "step": 1240 }, { "epoch": 0.04383435840151177, "grad_norm": 2.030421733856201, "learning_rate": 9.994998639530117e-06, "loss": 0.6059, "step": 1241 }, { "epoch": 0.04386968020521968, "grad_norm": 1.9290131330490112, "learning_rate": 9.994973028702624e-06, "loss": 0.9562, "step": 1242 }, { "epoch": 0.043905002008927586, "grad_norm": 2.0965404510498047, "learning_rate": 9.994947352501863e-06, "loss": 0.9433, "step": 1243 }, { "epoch": 0.04394032381263549, "grad_norm": 1.9408639669418335, "learning_rate": 9.99492161092817e-06, "loss": 0.919, "step": 1244 }, { "epoch": 0.0439756456163434, "grad_norm": 2.18496036529541, "learning_rate": 9.99489580398188e-06, "loss": 0.9271, "step": 1245 }, { "epoch": 0.044010967420051304, "grad_norm": 2.543729543685913, "learning_rate": 9.994869931663336e-06, "loss": 0.9819, "step": 1246 }, { "epoch": 0.04404628922375921, "grad_norm": 2.112464666366577, "learning_rate": 9.994843993972871e-06, "loss": 0.9437, "step": 1247 }, { "epoch": 0.04408161102746712, "grad_norm": 1.9101225137710571, "learning_rate": 9.994817990910827e-06, "loss": 0.9338, "step": 1248 }, { "epoch": 0.04411693283117502, "grad_norm": 1.9418607950210571, "learning_rate": 9.994791922477545e-06, "loss": 0.9596, "step": 1249 }, { "epoch": 0.04415225463488293, "grad_norm": 2.0888278484344482, "learning_rate": 9.994765788673364e-06, "loss": 0.9127, "step": 1250 }, { "epoch": 0.044187576438590837, "grad_norm": 2.1789205074310303, "learning_rate": 9.994739589498627e-06, "loss": 0.9902, "step": 1251 }, { "epoch": 0.04422289824229874, "grad_norm": 2.1393184661865234, "learning_rate": 9.994713324953678e-06, "loss": 0.9488, "step": 1252 }, { "epoch": 0.04425822004600665, "grad_norm": 2.24507212638855, "learning_rate": 9.99468699503886e-06, "loss": 0.9556, "step": 1253 }, { "epoch": 0.044293541849714554, "grad_norm": 2.049006462097168, "learning_rate": 9.994660599754518e-06, "loss": 0.9409, "step": 1254 }, { "epoch": 0.044328863653422465, "grad_norm": 2.1297738552093506, "learning_rate": 9.994634139100996e-06, "loss": 0.9108, "step": 1255 }, { "epoch": 0.04436418545713037, "grad_norm": 1.9419584274291992, "learning_rate": 9.99460761307864e-06, "loss": 0.9082, "step": 1256 }, { "epoch": 0.04439950726083827, "grad_norm": 2.095737934112549, "learning_rate": 9.994581021687801e-06, "loss": 0.956, "step": 1257 }, { "epoch": 0.04443482906454618, "grad_norm": 1.7463310956954956, "learning_rate": 9.994554364928824e-06, "loss": 0.5806, "step": 1258 }, { "epoch": 0.04447015086825409, "grad_norm": 2.1956000328063965, "learning_rate": 9.994527642802056e-06, "loss": 0.9206, "step": 1259 }, { "epoch": 0.04450547267196199, "grad_norm": 2.0487709045410156, "learning_rate": 9.994500855307852e-06, "loss": 0.9828, "step": 1260 }, { "epoch": 0.0445407944756699, "grad_norm": 1.8520570993423462, "learning_rate": 9.994474002446558e-06, "loss": 0.9374, "step": 1261 }, { "epoch": 0.044576116279377805, "grad_norm": 1.8604283332824707, "learning_rate": 9.994447084218528e-06, "loss": 0.9377, "step": 1262 }, { "epoch": 0.044611438083085715, "grad_norm": 1.831296682357788, "learning_rate": 9.994420100624114e-06, "loss": 0.9203, "step": 1263 }, { "epoch": 0.04464675988679362, "grad_norm": 1.8925188779830933, "learning_rate": 9.994393051663666e-06, "loss": 0.9126, "step": 1264 }, { "epoch": 0.04468208169050152, "grad_norm": 1.7747169733047485, "learning_rate": 9.994365937337542e-06, "loss": 1.0051, "step": 1265 }, { "epoch": 0.04471740349420943, "grad_norm": 1.2111245393753052, "learning_rate": 9.994338757646096e-06, "loss": 0.5802, "step": 1266 }, { "epoch": 0.04475272529791734, "grad_norm": 2.2131009101867676, "learning_rate": 9.994311512589683e-06, "loss": 0.9545, "step": 1267 }, { "epoch": 0.04478804710162525, "grad_norm": 2.0083134174346924, "learning_rate": 9.99428420216866e-06, "loss": 0.9364, "step": 1268 }, { "epoch": 0.04482336890533315, "grad_norm": 2.073540449142456, "learning_rate": 9.994256826383383e-06, "loss": 0.9062, "step": 1269 }, { "epoch": 0.044858690709041055, "grad_norm": 2.298872232437134, "learning_rate": 9.994229385234212e-06, "loss": 0.9638, "step": 1270 }, { "epoch": 0.044894012512748965, "grad_norm": 1.9848231077194214, "learning_rate": 9.994201878721506e-06, "loss": 0.9358, "step": 1271 }, { "epoch": 0.04492933431645687, "grad_norm": 2.2795374393463135, "learning_rate": 9.994174306845626e-06, "loss": 0.9486, "step": 1272 }, { "epoch": 0.04496465612016478, "grad_norm": 2.1481475830078125, "learning_rate": 9.994146669606928e-06, "loss": 0.9814, "step": 1273 }, { "epoch": 0.04499997792387268, "grad_norm": 1.9942282438278198, "learning_rate": 9.99411896700578e-06, "loss": 0.9696, "step": 1274 }, { "epoch": 0.04503529972758059, "grad_norm": 1.4331543445587158, "learning_rate": 9.994091199042541e-06, "loss": 0.6046, "step": 1275 }, { "epoch": 0.0450706215312885, "grad_norm": 2.6513020992279053, "learning_rate": 9.994063365717575e-06, "loss": 1.005, "step": 1276 }, { "epoch": 0.0451059433349964, "grad_norm": 2.326761484146118, "learning_rate": 9.994035467031247e-06, "loss": 0.9394, "step": 1277 }, { "epoch": 0.045141265138704305, "grad_norm": 2.1335091590881348, "learning_rate": 9.99400750298392e-06, "loss": 0.9634, "step": 1278 }, { "epoch": 0.045176586942412215, "grad_norm": 2.116115093231201, "learning_rate": 9.993979473575964e-06, "loss": 1.0001, "step": 1279 }, { "epoch": 0.04521190874612012, "grad_norm": 2.4673736095428467, "learning_rate": 9.99395137880774e-06, "loss": 1.0038, "step": 1280 }, { "epoch": 0.04524723054982803, "grad_norm": 2.108851194381714, "learning_rate": 9.993923218679621e-06, "loss": 0.976, "step": 1281 }, { "epoch": 0.04528255235353593, "grad_norm": 2.0081911087036133, "learning_rate": 9.993894993191975e-06, "loss": 0.9249, "step": 1282 }, { "epoch": 0.04531787415724384, "grad_norm": 2.320120096206665, "learning_rate": 9.99386670234517e-06, "loss": 0.9419, "step": 1283 }, { "epoch": 0.04535319596095175, "grad_norm": 2.261070966720581, "learning_rate": 9.993838346139575e-06, "loss": 0.9402, "step": 1284 }, { "epoch": 0.04538851776465965, "grad_norm": 1.8914399147033691, "learning_rate": 9.993809924575561e-06, "loss": 0.9269, "step": 1285 }, { "epoch": 0.04542383956836756, "grad_norm": 1.765934705734253, "learning_rate": 9.993781437653505e-06, "loss": 0.9563, "step": 1286 }, { "epoch": 0.045459161372075466, "grad_norm": 2.1146256923675537, "learning_rate": 9.993752885373774e-06, "loss": 0.9177, "step": 1287 }, { "epoch": 0.04549448317578337, "grad_norm": 1.97575044631958, "learning_rate": 9.993724267736745e-06, "loss": 0.9739, "step": 1288 }, { "epoch": 0.04552980497949128, "grad_norm": 1.7740975618362427, "learning_rate": 9.99369558474279e-06, "loss": 0.8979, "step": 1289 }, { "epoch": 0.045565126783199184, "grad_norm": 2.3371071815490723, "learning_rate": 9.993666836392286e-06, "loss": 0.9806, "step": 1290 }, { "epoch": 0.04560044858690709, "grad_norm": 1.8964444398880005, "learning_rate": 9.993638022685612e-06, "loss": 0.9393, "step": 1291 }, { "epoch": 0.045635770390615, "grad_norm": 1.9372648000717163, "learning_rate": 9.99360914362314e-06, "loss": 0.987, "step": 1292 }, { "epoch": 0.0456710921943229, "grad_norm": 1.972298264503479, "learning_rate": 9.99358019920525e-06, "loss": 0.945, "step": 1293 }, { "epoch": 0.04570641399803081, "grad_norm": 1.9649572372436523, "learning_rate": 9.993551189432322e-06, "loss": 0.972, "step": 1294 }, { "epoch": 0.045741735801738716, "grad_norm": 1.936632752418518, "learning_rate": 9.993522114304733e-06, "loss": 0.9187, "step": 1295 }, { "epoch": 0.04577705760544662, "grad_norm": 1.855046033859253, "learning_rate": 9.993492973822866e-06, "loss": 0.9805, "step": 1296 }, { "epoch": 0.04581237940915453, "grad_norm": 1.7817928791046143, "learning_rate": 9.993463767987103e-06, "loss": 0.9433, "step": 1297 }, { "epoch": 0.045847701212862434, "grad_norm": 1.9373003244400024, "learning_rate": 9.993434496797823e-06, "loss": 0.9354, "step": 1298 }, { "epoch": 0.045883023016570344, "grad_norm": 2.224309206008911, "learning_rate": 9.993405160255413e-06, "loss": 0.978, "step": 1299 }, { "epoch": 0.04591834482027825, "grad_norm": 2.21175217628479, "learning_rate": 9.993375758360253e-06, "loss": 0.9518, "step": 1300 }, { "epoch": 0.04595366662398615, "grad_norm": 1.8475143909454346, "learning_rate": 9.99334629111273e-06, "loss": 0.9761, "step": 1301 }, { "epoch": 0.04598898842769406, "grad_norm": 2.075853109359741, "learning_rate": 9.993316758513232e-06, "loss": 0.9312, "step": 1302 }, { "epoch": 0.046024310231401966, "grad_norm": 1.998919129371643, "learning_rate": 9.99328716056214e-06, "loss": 0.9258, "step": 1303 }, { "epoch": 0.04605963203510987, "grad_norm": 2.1298627853393555, "learning_rate": 9.993257497259845e-06, "loss": 0.9856, "step": 1304 }, { "epoch": 0.04609495383881778, "grad_norm": 1.9078365564346313, "learning_rate": 9.993227768606734e-06, "loss": 0.9505, "step": 1305 }, { "epoch": 0.046130275642525684, "grad_norm": 2.2202069759368896, "learning_rate": 9.993197974603198e-06, "loss": 0.9798, "step": 1306 }, { "epoch": 0.046165597446233594, "grad_norm": 3.191760540008545, "learning_rate": 9.993168115249626e-06, "loss": 0.9639, "step": 1307 }, { "epoch": 0.0462009192499415, "grad_norm": 1.8600152730941772, "learning_rate": 9.993138190546406e-06, "loss": 0.9188, "step": 1308 }, { "epoch": 0.0462362410536494, "grad_norm": 1.8401154279708862, "learning_rate": 9.993108200493933e-06, "loss": 0.9547, "step": 1309 }, { "epoch": 0.04627156285735731, "grad_norm": 2.04325532913208, "learning_rate": 9.9930781450926e-06, "loss": 0.9339, "step": 1310 }, { "epoch": 0.046306884661065216, "grad_norm": 2.223557710647583, "learning_rate": 9.993048024342796e-06, "loss": 0.9526, "step": 1311 }, { "epoch": 0.04634220646477313, "grad_norm": 1.9541573524475098, "learning_rate": 9.99301783824492e-06, "loss": 0.9708, "step": 1312 }, { "epoch": 0.04637752826848103, "grad_norm": 1.8550843000411987, "learning_rate": 9.992987586799364e-06, "loss": 0.9323, "step": 1313 }, { "epoch": 0.046412850072188934, "grad_norm": 1.8490517139434814, "learning_rate": 9.992957270006524e-06, "loss": 0.9634, "step": 1314 }, { "epoch": 0.046448171875896845, "grad_norm": 1.77997887134552, "learning_rate": 9.992926887866801e-06, "loss": 0.9421, "step": 1315 }, { "epoch": 0.04648349367960475, "grad_norm": 1.9424736499786377, "learning_rate": 9.992896440380589e-06, "loss": 0.9629, "step": 1316 }, { "epoch": 0.04651881548331266, "grad_norm": 1.9466224908828735, "learning_rate": 9.992865927548286e-06, "loss": 0.9247, "step": 1317 }, { "epoch": 0.04655413728702056, "grad_norm": 1.801544189453125, "learning_rate": 9.992835349370292e-06, "loss": 0.8983, "step": 1318 }, { "epoch": 0.046589459090728466, "grad_norm": 1.8328288793563843, "learning_rate": 9.992804705847005e-06, "loss": 0.9615, "step": 1319 }, { "epoch": 0.04662478089443638, "grad_norm": 1.7795921564102173, "learning_rate": 9.992773996978833e-06, "loss": 0.9481, "step": 1320 }, { "epoch": 0.04666010269814428, "grad_norm": 1.8347363471984863, "learning_rate": 9.99274322276617e-06, "loss": 0.9602, "step": 1321 }, { "epoch": 0.046695424501852184, "grad_norm": 1.9685972929000854, "learning_rate": 9.992712383209424e-06, "loss": 0.9387, "step": 1322 }, { "epoch": 0.046730746305560095, "grad_norm": 1.9515074491500854, "learning_rate": 9.992681478308997e-06, "loss": 0.9487, "step": 1323 }, { "epoch": 0.046766068109268, "grad_norm": 1.9370218515396118, "learning_rate": 9.992650508065293e-06, "loss": 0.9686, "step": 1324 }, { "epoch": 0.04680138991297591, "grad_norm": 1.5671745538711548, "learning_rate": 9.992619472478719e-06, "loss": 0.5901, "step": 1325 }, { "epoch": 0.04683671171668381, "grad_norm": 2.0427539348602295, "learning_rate": 9.992588371549678e-06, "loss": 0.9142, "step": 1326 }, { "epoch": 0.046872033520391716, "grad_norm": 2.101088762283325, "learning_rate": 9.99255720527858e-06, "loss": 0.9318, "step": 1327 }, { "epoch": 0.04690735532409963, "grad_norm": 1.9240540266036987, "learning_rate": 9.992525973665831e-06, "loss": 0.9374, "step": 1328 }, { "epoch": 0.04694267712780753, "grad_norm": 1.9641392230987549, "learning_rate": 9.99249467671184e-06, "loss": 0.9438, "step": 1329 }, { "epoch": 0.04697799893151544, "grad_norm": 1.8714189529418945, "learning_rate": 9.992463314417017e-06, "loss": 0.9378, "step": 1330 }, { "epoch": 0.047013320735223345, "grad_norm": 1.949670672416687, "learning_rate": 9.992431886781775e-06, "loss": 0.9169, "step": 1331 }, { "epoch": 0.04704864253893125, "grad_norm": 2.3626480102539062, "learning_rate": 9.992400393806521e-06, "loss": 0.9424, "step": 1332 }, { "epoch": 0.04708396434263916, "grad_norm": 1.9136769771575928, "learning_rate": 9.99236883549167e-06, "loss": 0.9222, "step": 1333 }, { "epoch": 0.04711928614634706, "grad_norm": 2.0633955001831055, "learning_rate": 9.992337211837633e-06, "loss": 0.9628, "step": 1334 }, { "epoch": 0.047154607950054966, "grad_norm": 2.2494149208068848, "learning_rate": 9.992305522844827e-06, "loss": 0.9724, "step": 1335 }, { "epoch": 0.04718992975376288, "grad_norm": 2.1279397010803223, "learning_rate": 9.992273768513663e-06, "loss": 0.938, "step": 1336 }, { "epoch": 0.04722525155747078, "grad_norm": 1.770060658454895, "learning_rate": 9.992241948844559e-06, "loss": 0.8899, "step": 1337 }, { "epoch": 0.04726057336117869, "grad_norm": 2.316953182220459, "learning_rate": 9.99221006383793e-06, "loss": 0.9514, "step": 1338 }, { "epoch": 0.047295895164886595, "grad_norm": 2.1426546573638916, "learning_rate": 9.992178113494196e-06, "loss": 0.9372, "step": 1339 }, { "epoch": 0.0473312169685945, "grad_norm": 2.0271825790405273, "learning_rate": 9.992146097813772e-06, "loss": 0.9351, "step": 1340 }, { "epoch": 0.04736653877230241, "grad_norm": 2.0337042808532715, "learning_rate": 9.992114016797079e-06, "loss": 0.9369, "step": 1341 }, { "epoch": 0.04740186057601031, "grad_norm": 2.2174251079559326, "learning_rate": 9.992081870444535e-06, "loss": 0.9484, "step": 1342 }, { "epoch": 0.047437182379718223, "grad_norm": 2.452831506729126, "learning_rate": 9.992049658756564e-06, "loss": 0.9899, "step": 1343 }, { "epoch": 0.04747250418342613, "grad_norm": 1.9005287885665894, "learning_rate": 9.992017381733584e-06, "loss": 0.9402, "step": 1344 }, { "epoch": 0.04750782598713403, "grad_norm": 1.9364138841629028, "learning_rate": 9.991985039376018e-06, "loss": 0.9404, "step": 1345 }, { "epoch": 0.04754314779084194, "grad_norm": 2.1701605319976807, "learning_rate": 9.99195263168429e-06, "loss": 0.9508, "step": 1346 }, { "epoch": 0.047578469594549845, "grad_norm": 2.2523386478424072, "learning_rate": 9.991920158658825e-06, "loss": 0.941, "step": 1347 }, { "epoch": 0.04761379139825775, "grad_norm": 2.046339988708496, "learning_rate": 9.99188762030005e-06, "loss": 0.9266, "step": 1348 }, { "epoch": 0.04764911320196566, "grad_norm": 2.021944761276245, "learning_rate": 9.991855016608385e-06, "loss": 0.992, "step": 1349 }, { "epoch": 0.04768443500567356, "grad_norm": 1.9447890520095825, "learning_rate": 9.991822347584261e-06, "loss": 0.9371, "step": 1350 }, { "epoch": 0.047719756809381474, "grad_norm": 1.9205877780914307, "learning_rate": 9.991789613228104e-06, "loss": 0.9601, "step": 1351 }, { "epoch": 0.04775507861308938, "grad_norm": 1.8400086164474487, "learning_rate": 9.991756813540342e-06, "loss": 0.9156, "step": 1352 }, { "epoch": 0.04779040041679728, "grad_norm": 1.9292292594909668, "learning_rate": 9.991723948521409e-06, "loss": 0.9526, "step": 1353 }, { "epoch": 0.04782572222050519, "grad_norm": 1.94857919216156, "learning_rate": 9.991691018171728e-06, "loss": 0.9558, "step": 1354 }, { "epoch": 0.047861044024213095, "grad_norm": 1.7791920900344849, "learning_rate": 9.991658022491733e-06, "loss": 0.9538, "step": 1355 }, { "epoch": 0.047896365827921006, "grad_norm": 1.8302721977233887, "learning_rate": 9.991624961481857e-06, "loss": 0.9356, "step": 1356 }, { "epoch": 0.04793168763162891, "grad_norm": 1.909471035003662, "learning_rate": 9.991591835142532e-06, "loss": 0.9602, "step": 1357 }, { "epoch": 0.04796700943533681, "grad_norm": 1.7053333520889282, "learning_rate": 9.991558643474192e-06, "loss": 0.9337, "step": 1358 }, { "epoch": 0.048002331239044724, "grad_norm": 1.8501185178756714, "learning_rate": 9.99152538647727e-06, "loss": 0.9647, "step": 1359 }, { "epoch": 0.04803765304275263, "grad_norm": 1.8526253700256348, "learning_rate": 9.991492064152202e-06, "loss": 0.9748, "step": 1360 }, { "epoch": 0.04807297484646053, "grad_norm": 1.9172112941741943, "learning_rate": 9.991458676499424e-06, "loss": 0.9412, "step": 1361 }, { "epoch": 0.04810829665016844, "grad_norm": 1.9222385883331299, "learning_rate": 9.991425223519373e-06, "loss": 0.9396, "step": 1362 }, { "epoch": 0.048143618453876345, "grad_norm": 1.8733237981796265, "learning_rate": 9.991391705212487e-06, "loss": 0.949, "step": 1363 }, { "epoch": 0.048178940257584256, "grad_norm": 1.9013831615447998, "learning_rate": 9.991358121579207e-06, "loss": 0.946, "step": 1364 }, { "epoch": 0.04821426206129216, "grad_norm": 2.04947829246521, "learning_rate": 9.991324472619967e-06, "loss": 0.9614, "step": 1365 }, { "epoch": 0.04824958386500006, "grad_norm": 1.6417194604873657, "learning_rate": 9.991290758335213e-06, "loss": 0.9053, "step": 1366 }, { "epoch": 0.048284905668707974, "grad_norm": 1.7637044191360474, "learning_rate": 9.991256978725381e-06, "loss": 0.939, "step": 1367 }, { "epoch": 0.04832022747241588, "grad_norm": 1.8300031423568726, "learning_rate": 9.991223133790919e-06, "loss": 0.943, "step": 1368 }, { "epoch": 0.04835554927612379, "grad_norm": 2.149958848953247, "learning_rate": 9.991189223532264e-06, "loss": 0.9408, "step": 1369 }, { "epoch": 0.04839087107983169, "grad_norm": 1.9975923299789429, "learning_rate": 9.991155247949865e-06, "loss": 0.9528, "step": 1370 }, { "epoch": 0.048426192883539596, "grad_norm": 2.064772605895996, "learning_rate": 9.991121207044164e-06, "loss": 0.9977, "step": 1371 }, { "epoch": 0.048461514687247506, "grad_norm": 1.8519703149795532, "learning_rate": 9.991087100815606e-06, "loss": 0.9063, "step": 1372 }, { "epoch": 0.04849683649095541, "grad_norm": 2.2731828689575195, "learning_rate": 9.991052929264637e-06, "loss": 0.9373, "step": 1373 }, { "epoch": 0.04853215829466332, "grad_norm": 1.930518388748169, "learning_rate": 9.991018692391708e-06, "loss": 0.9605, "step": 1374 }, { "epoch": 0.048567480098371224, "grad_norm": 2.2737178802490234, "learning_rate": 9.990984390197263e-06, "loss": 0.9307, "step": 1375 }, { "epoch": 0.04860280190207913, "grad_norm": 1.8147069215774536, "learning_rate": 9.990950022681753e-06, "loss": 0.9082, "step": 1376 }, { "epoch": 0.04863812370578704, "grad_norm": 1.8436894416809082, "learning_rate": 9.990915589845629e-06, "loss": 0.9185, "step": 1377 }, { "epoch": 0.04867344550949494, "grad_norm": 1.877102255821228, "learning_rate": 9.990881091689337e-06, "loss": 0.9188, "step": 1378 }, { "epoch": 0.048708767313202846, "grad_norm": 2.0760433673858643, "learning_rate": 9.990846528213334e-06, "loss": 0.9534, "step": 1379 }, { "epoch": 0.048744089116910756, "grad_norm": 1.9403722286224365, "learning_rate": 9.990811899418069e-06, "loss": 0.9647, "step": 1380 }, { "epoch": 0.04877941092061866, "grad_norm": 2.1110446453094482, "learning_rate": 9.990777205303995e-06, "loss": 0.9279, "step": 1381 }, { "epoch": 0.04881473272432657, "grad_norm": 1.9725829362869263, "learning_rate": 9.990742445871568e-06, "loss": 0.9507, "step": 1382 }, { "epoch": 0.048850054528034474, "grad_norm": 1.8396713733673096, "learning_rate": 9.99070762112124e-06, "loss": 0.9557, "step": 1383 }, { "epoch": 0.04888537633174238, "grad_norm": 2.0419936180114746, "learning_rate": 9.990672731053472e-06, "loss": 0.9453, "step": 1384 }, { "epoch": 0.04892069813545029, "grad_norm": 1.845369815826416, "learning_rate": 9.990637775668716e-06, "loss": 0.6002, "step": 1385 }, { "epoch": 0.04895601993915819, "grad_norm": 2.3187005519866943, "learning_rate": 9.990602754967431e-06, "loss": 0.9575, "step": 1386 }, { "epoch": 0.0489913417428661, "grad_norm": 2.1372692584991455, "learning_rate": 9.990567668950076e-06, "loss": 0.9292, "step": 1387 }, { "epoch": 0.049026663546574006, "grad_norm": 1.8549104928970337, "learning_rate": 9.990532517617107e-06, "loss": 0.9168, "step": 1388 }, { "epoch": 0.04906198535028191, "grad_norm": 2.037196397781372, "learning_rate": 9.990497300968989e-06, "loss": 0.9505, "step": 1389 }, { "epoch": 0.04909730715398982, "grad_norm": 2.010603904724121, "learning_rate": 9.990462019006179e-06, "loss": 0.9624, "step": 1390 }, { "epoch": 0.049132628957697724, "grad_norm": 1.9539061784744263, "learning_rate": 9.990426671729141e-06, "loss": 0.9464, "step": 1391 }, { "epoch": 0.04916795076140563, "grad_norm": 1.6791536808013916, "learning_rate": 9.990391259138336e-06, "loss": 0.9252, "step": 1392 }, { "epoch": 0.04920327256511354, "grad_norm": 2.005841016769409, "learning_rate": 9.99035578123423e-06, "loss": 0.9397, "step": 1393 }, { "epoch": 0.04923859436882144, "grad_norm": 1.8544154167175293, "learning_rate": 9.990320238017284e-06, "loss": 0.9682, "step": 1394 }, { "epoch": 0.04927391617252935, "grad_norm": 1.8797534704208374, "learning_rate": 9.990284629487965e-06, "loss": 0.9312, "step": 1395 }, { "epoch": 0.049309237976237257, "grad_norm": 1.9614102840423584, "learning_rate": 9.99024895564674e-06, "loss": 0.9706, "step": 1396 }, { "epoch": 0.04934455977994516, "grad_norm": 2.1404988765716553, "learning_rate": 9.990213216494073e-06, "loss": 0.9576, "step": 1397 }, { "epoch": 0.04937988158365307, "grad_norm": 1.9772402048110962, "learning_rate": 9.990177412030436e-06, "loss": 0.9269, "step": 1398 }, { "epoch": 0.049415203387360974, "grad_norm": 1.727837085723877, "learning_rate": 9.990141542256293e-06, "loss": 0.9554, "step": 1399 }, { "epoch": 0.049450525191068885, "grad_norm": 2.0100159645080566, "learning_rate": 9.990105607172115e-06, "loss": 0.9573, "step": 1400 }, { "epoch": 0.04948584699477679, "grad_norm": 2.0386650562286377, "learning_rate": 9.990069606778374e-06, "loss": 0.9325, "step": 1401 }, { "epoch": 0.04952116879848469, "grad_norm": 2.277865409851074, "learning_rate": 9.99003354107554e-06, "loss": 0.9718, "step": 1402 }, { "epoch": 0.0495564906021926, "grad_norm": 2.0984890460968018, "learning_rate": 9.989997410064085e-06, "loss": 0.9315, "step": 1403 }, { "epoch": 0.04959181240590051, "grad_norm": 2.106388807296753, "learning_rate": 9.989961213744484e-06, "loss": 0.9483, "step": 1404 }, { "epoch": 0.04962713420960841, "grad_norm": 1.8278789520263672, "learning_rate": 9.989924952117205e-06, "loss": 0.9406, "step": 1405 }, { "epoch": 0.04966245601331632, "grad_norm": 1.9396518468856812, "learning_rate": 9.989888625182729e-06, "loss": 0.9194, "step": 1406 }, { "epoch": 0.049697777817024225, "grad_norm": 2.34519362449646, "learning_rate": 9.989852232941526e-06, "loss": 0.97, "step": 1407 }, { "epoch": 0.049733099620732135, "grad_norm": 1.9661586284637451, "learning_rate": 9.989815775394077e-06, "loss": 0.9114, "step": 1408 }, { "epoch": 0.04976842142444004, "grad_norm": 1.7108367681503296, "learning_rate": 9.989779252540857e-06, "loss": 0.9518, "step": 1409 }, { "epoch": 0.04980374322814794, "grad_norm": 2.194854497909546, "learning_rate": 9.989742664382342e-06, "loss": 0.9543, "step": 1410 }, { "epoch": 0.04983906503185585, "grad_norm": 1.7076576948165894, "learning_rate": 9.989706010919015e-06, "loss": 0.5823, "step": 1411 }, { "epoch": 0.04987438683556376, "grad_norm": 2.291936159133911, "learning_rate": 9.989669292151354e-06, "loss": 0.938, "step": 1412 }, { "epoch": 0.04990970863927167, "grad_norm": 2.302525281906128, "learning_rate": 9.989632508079838e-06, "loss": 0.9249, "step": 1413 }, { "epoch": 0.04994503044297957, "grad_norm": 2.208949089050293, "learning_rate": 9.989595658704951e-06, "loss": 0.9249, "step": 1414 }, { "epoch": 0.049980352246687475, "grad_norm": 1.955027461051941, "learning_rate": 9.989558744027173e-06, "loss": 0.9523, "step": 1415 }, { "epoch": 0.050015674050395385, "grad_norm": 2.7193710803985596, "learning_rate": 9.989521764046987e-06, "loss": 0.9367, "step": 1416 }, { "epoch": 0.05005099585410329, "grad_norm": 2.403618574142456, "learning_rate": 9.98948471876488e-06, "loss": 0.9838, "step": 1417 }, { "epoch": 0.05008631765781119, "grad_norm": 1.8101451396942139, "learning_rate": 9.989447608181335e-06, "loss": 0.9024, "step": 1418 }, { "epoch": 0.0501216394615191, "grad_norm": 1.8443597555160522, "learning_rate": 9.989410432296836e-06, "loss": 0.9328, "step": 1419 }, { "epoch": 0.05015696126522701, "grad_norm": 1.7720955610275269, "learning_rate": 9.989373191111872e-06, "loss": 0.9418, "step": 1420 }, { "epoch": 0.05019228306893492, "grad_norm": 1.8516817092895508, "learning_rate": 9.989335884626931e-06, "loss": 0.9781, "step": 1421 }, { "epoch": 0.05022760487264282, "grad_norm": 1.862284541130066, "learning_rate": 9.989298512842499e-06, "loss": 0.9306, "step": 1422 }, { "epoch": 0.050262926676350725, "grad_norm": 1.8261064291000366, "learning_rate": 9.989261075759065e-06, "loss": 0.9175, "step": 1423 }, { "epoch": 0.050298248480058635, "grad_norm": 1.8410594463348389, "learning_rate": 9.989223573377121e-06, "loss": 0.9589, "step": 1424 }, { "epoch": 0.05033357028376654, "grad_norm": 1.9137128591537476, "learning_rate": 9.989186005697155e-06, "loss": 0.9131, "step": 1425 }, { "epoch": 0.05036889208747445, "grad_norm": 1.8786612749099731, "learning_rate": 9.98914837271966e-06, "loss": 0.9332, "step": 1426 }, { "epoch": 0.05040421389118235, "grad_norm": 1.8883507251739502, "learning_rate": 9.989110674445132e-06, "loss": 0.9545, "step": 1427 }, { "epoch": 0.05043953569489026, "grad_norm": 1.9210379123687744, "learning_rate": 9.98907291087406e-06, "loss": 0.9775, "step": 1428 }, { "epoch": 0.05047485749859817, "grad_norm": 1.7395764589309692, "learning_rate": 9.98903508200694e-06, "loss": 0.9159, "step": 1429 }, { "epoch": 0.05051017930230607, "grad_norm": 1.9496508836746216, "learning_rate": 9.988997187844264e-06, "loss": 0.9311, "step": 1430 }, { "epoch": 0.05054550110601398, "grad_norm": 1.933045506477356, "learning_rate": 9.988959228386534e-06, "loss": 0.9307, "step": 1431 }, { "epoch": 0.050580822909721886, "grad_norm": 1.9437036514282227, "learning_rate": 9.988921203634241e-06, "loss": 0.9331, "step": 1432 }, { "epoch": 0.05061614471342979, "grad_norm": 1.7125663757324219, "learning_rate": 9.988883113587888e-06, "loss": 0.9338, "step": 1433 }, { "epoch": 0.0506514665171377, "grad_norm": 2.466925621032715, "learning_rate": 9.988844958247967e-06, "loss": 0.9002, "step": 1434 }, { "epoch": 0.050686788320845604, "grad_norm": 1.9520174264907837, "learning_rate": 9.988806737614984e-06, "loss": 0.9383, "step": 1435 }, { "epoch": 0.05072211012455351, "grad_norm": 1.8605235815048218, "learning_rate": 9.988768451689434e-06, "loss": 0.94, "step": 1436 }, { "epoch": 0.05075743192826142, "grad_norm": 1.8600754737854004, "learning_rate": 9.988730100471821e-06, "loss": 0.9351, "step": 1437 }, { "epoch": 0.05079275373196932, "grad_norm": 2.188246011734009, "learning_rate": 9.988691683962647e-06, "loss": 0.9619, "step": 1438 }, { "epoch": 0.05082807553567723, "grad_norm": 1.928718090057373, "learning_rate": 9.988653202162415e-06, "loss": 0.9307, "step": 1439 }, { "epoch": 0.050863397339385136, "grad_norm": 1.6696585416793823, "learning_rate": 9.988614655071625e-06, "loss": 0.9165, "step": 1440 }, { "epoch": 0.05089871914309304, "grad_norm": 1.8863202333450317, "learning_rate": 9.988576042690786e-06, "loss": 0.9327, "step": 1441 }, { "epoch": 0.05093404094680095, "grad_norm": 1.9789608716964722, "learning_rate": 9.988537365020401e-06, "loss": 0.9457, "step": 1442 }, { "epoch": 0.050969362750508854, "grad_norm": 3.2360565662384033, "learning_rate": 9.988498622060977e-06, "loss": 0.9445, "step": 1443 }, { "epoch": 0.051004684554216764, "grad_norm": 1.7235321998596191, "learning_rate": 9.98845981381302e-06, "loss": 0.9233, "step": 1444 }, { "epoch": 0.05104000635792467, "grad_norm": 2.1956000328063965, "learning_rate": 9.98842094027704e-06, "loss": 0.9568, "step": 1445 }, { "epoch": 0.05107532816163257, "grad_norm": 2.113419771194458, "learning_rate": 9.988382001453542e-06, "loss": 0.946, "step": 1446 }, { "epoch": 0.05111064996534048, "grad_norm": 2.0075926780700684, "learning_rate": 9.98834299734304e-06, "loss": 0.9507, "step": 1447 }, { "epoch": 0.051145971769048386, "grad_norm": 1.8825290203094482, "learning_rate": 9.988303927946043e-06, "loss": 0.9312, "step": 1448 }, { "epoch": 0.05118129357275629, "grad_norm": 1.9440701007843018, "learning_rate": 9.98826479326306e-06, "loss": 0.9559, "step": 1449 }, { "epoch": 0.0512166153764642, "grad_norm": 1.8485826253890991, "learning_rate": 9.988225593294608e-06, "loss": 0.9275, "step": 1450 }, { "epoch": 0.051251937180172104, "grad_norm": 1.9049395322799683, "learning_rate": 9.988186328041194e-06, "loss": 0.9463, "step": 1451 }, { "epoch": 0.051287258983880014, "grad_norm": 1.9498275518417358, "learning_rate": 9.988146997503337e-06, "loss": 0.883, "step": 1452 }, { "epoch": 0.05132258078758792, "grad_norm": 1.733211636543274, "learning_rate": 9.988107601681547e-06, "loss": 0.9369, "step": 1453 }, { "epoch": 0.05135790259129582, "grad_norm": 1.9287883043289185, "learning_rate": 9.988068140576345e-06, "loss": 0.9206, "step": 1454 }, { "epoch": 0.05139322439500373, "grad_norm": 1.7354238033294678, "learning_rate": 9.988028614188245e-06, "loss": 0.9264, "step": 1455 }, { "epoch": 0.051428546198711636, "grad_norm": 2.783083915710449, "learning_rate": 9.987989022517763e-06, "loss": 0.8986, "step": 1456 }, { "epoch": 0.05146386800241955, "grad_norm": 2.143465518951416, "learning_rate": 9.987949365565419e-06, "loss": 0.9419, "step": 1457 }, { "epoch": 0.05149918980612745, "grad_norm": 1.7963027954101562, "learning_rate": 9.98790964333173e-06, "loss": 0.9163, "step": 1458 }, { "epoch": 0.051534511609835354, "grad_norm": 1.844444990158081, "learning_rate": 9.98786985581722e-06, "loss": 0.9555, "step": 1459 }, { "epoch": 0.051569833413543265, "grad_norm": 2.090181350708008, "learning_rate": 9.987830003022403e-06, "loss": 0.9422, "step": 1460 }, { "epoch": 0.05160515521725117, "grad_norm": 2.2453830242156982, "learning_rate": 9.987790084947808e-06, "loss": 0.9516, "step": 1461 }, { "epoch": 0.05164047702095907, "grad_norm": 1.8332818746566772, "learning_rate": 9.987750101593953e-06, "loss": 0.9176, "step": 1462 }, { "epoch": 0.05167579882466698, "grad_norm": 1.9629251956939697, "learning_rate": 9.987710052961362e-06, "loss": 0.9246, "step": 1463 }, { "epoch": 0.051711120628374886, "grad_norm": 1.7200292348861694, "learning_rate": 9.987669939050559e-06, "loss": 0.9668, "step": 1464 }, { "epoch": 0.0517464424320828, "grad_norm": 1.9494282007217407, "learning_rate": 9.98762975986207e-06, "loss": 0.9194, "step": 1465 }, { "epoch": 0.0517817642357907, "grad_norm": 2.0245227813720703, "learning_rate": 9.98758951539642e-06, "loss": 0.9038, "step": 1466 }, { "epoch": 0.051817086039498604, "grad_norm": 1.8039264678955078, "learning_rate": 9.987549205654136e-06, "loss": 0.8771, "step": 1467 }, { "epoch": 0.051852407843206515, "grad_norm": 1.8062127828598022, "learning_rate": 9.987508830635745e-06, "loss": 0.9138, "step": 1468 }, { "epoch": 0.05188772964691442, "grad_norm": 1.9226810932159424, "learning_rate": 9.987468390341777e-06, "loss": 0.9209, "step": 1469 }, { "epoch": 0.05192305145062233, "grad_norm": 2.1212568283081055, "learning_rate": 9.98742788477276e-06, "loss": 0.9428, "step": 1470 }, { "epoch": 0.05195837325433023, "grad_norm": 1.7927708625793457, "learning_rate": 9.987387313929224e-06, "loss": 0.921, "step": 1471 }, { "epoch": 0.051993695058038136, "grad_norm": 1.874182105064392, "learning_rate": 9.9873466778117e-06, "loss": 0.9329, "step": 1472 }, { "epoch": 0.05202901686174605, "grad_norm": 1.9541239738464355, "learning_rate": 9.987305976420722e-06, "loss": 0.9651, "step": 1473 }, { "epoch": 0.05206433866545395, "grad_norm": 1.738925576210022, "learning_rate": 9.98726520975682e-06, "loss": 0.9105, "step": 1474 }, { "epoch": 0.05209966046916186, "grad_norm": 2.0485246181488037, "learning_rate": 9.987224377820527e-06, "loss": 0.9595, "step": 1475 }, { "epoch": 0.052134982272869765, "grad_norm": 1.9825952053070068, "learning_rate": 9.98718348061238e-06, "loss": 0.9661, "step": 1476 }, { "epoch": 0.05217030407657767, "grad_norm": 1.920179009437561, "learning_rate": 9.987142518132913e-06, "loss": 0.9309, "step": 1477 }, { "epoch": 0.05220562588028558, "grad_norm": 1.8969676494598389, "learning_rate": 9.987101490382663e-06, "loss": 0.9345, "step": 1478 }, { "epoch": 0.05224094768399348, "grad_norm": 1.9412614107131958, "learning_rate": 9.987060397362165e-06, "loss": 0.9396, "step": 1479 }, { "epoch": 0.052276269487701386, "grad_norm": 2.0627262592315674, "learning_rate": 9.987019239071957e-06, "loss": 0.9036, "step": 1480 }, { "epoch": 0.0523115912914093, "grad_norm": 1.8705744743347168, "learning_rate": 9.986978015512581e-06, "loss": 0.9425, "step": 1481 }, { "epoch": 0.0523469130951172, "grad_norm": 1.7942906618118286, "learning_rate": 9.986936726684573e-06, "loss": 0.9043, "step": 1482 }, { "epoch": 0.05238223489882511, "grad_norm": 1.8419278860092163, "learning_rate": 9.986895372588474e-06, "loss": 0.9345, "step": 1483 }, { "epoch": 0.052417556702533015, "grad_norm": 1.7562118768692017, "learning_rate": 9.986853953224828e-06, "loss": 0.9296, "step": 1484 }, { "epoch": 0.05245287850624092, "grad_norm": 1.892598032951355, "learning_rate": 9.986812468594173e-06, "loss": 0.9412, "step": 1485 }, { "epoch": 0.05248820030994883, "grad_norm": 2.1380176544189453, "learning_rate": 9.986770918697054e-06, "loss": 0.9475, "step": 1486 }, { "epoch": 0.05252352211365673, "grad_norm": 1.838673710823059, "learning_rate": 9.986729303534016e-06, "loss": 0.9343, "step": 1487 }, { "epoch": 0.052558843917364644, "grad_norm": 2.2224366664886475, "learning_rate": 9.9866876231056e-06, "loss": 0.9507, "step": 1488 }, { "epoch": 0.05259416572107255, "grad_norm": 1.8815407752990723, "learning_rate": 9.986645877412355e-06, "loss": 0.9244, "step": 1489 }, { "epoch": 0.05262948752478045, "grad_norm": 2.208841562271118, "learning_rate": 9.986604066454827e-06, "loss": 0.9586, "step": 1490 }, { "epoch": 0.05266480932848836, "grad_norm": 2.0120229721069336, "learning_rate": 9.98656219023356e-06, "loss": 0.8985, "step": 1491 }, { "epoch": 0.052700131132196265, "grad_norm": 2.422150135040283, "learning_rate": 9.986520248749107e-06, "loss": 0.9129, "step": 1492 }, { "epoch": 0.05273545293590417, "grad_norm": 1.3153430223464966, "learning_rate": 9.986478242002014e-06, "loss": 0.5679, "step": 1493 }, { "epoch": 0.05277077473961208, "grad_norm": 2.010446071624756, "learning_rate": 9.98643616999283e-06, "loss": 0.9569, "step": 1494 }, { "epoch": 0.05280609654331998, "grad_norm": 2.2395663261413574, "learning_rate": 9.986394032722106e-06, "loss": 0.9934, "step": 1495 }, { "epoch": 0.052841418347027894, "grad_norm": 2.1272263526916504, "learning_rate": 9.986351830190396e-06, "loss": 0.9485, "step": 1496 }, { "epoch": 0.0528767401507358, "grad_norm": 1.8760689496994019, "learning_rate": 9.98630956239825e-06, "loss": 0.9552, "step": 1497 }, { "epoch": 0.0529120619544437, "grad_norm": 2.0493602752685547, "learning_rate": 9.986267229346221e-06, "loss": 0.9118, "step": 1498 }, { "epoch": 0.05294738375815161, "grad_norm": 2.084792137145996, "learning_rate": 9.986224831034865e-06, "loss": 0.9385, "step": 1499 }, { "epoch": 0.052982705561859515, "grad_norm": 2.1173629760742188, "learning_rate": 9.986182367464736e-06, "loss": 0.9032, "step": 1500 }, { "epoch": 0.053018027365567426, "grad_norm": 1.7981181144714355, "learning_rate": 9.986139838636388e-06, "loss": 0.9263, "step": 1501 }, { "epoch": 0.05305334916927533, "grad_norm": 1.9798178672790527, "learning_rate": 9.98609724455038e-06, "loss": 0.9455, "step": 1502 }, { "epoch": 0.05308867097298323, "grad_norm": 1.9901210069656372, "learning_rate": 9.986054585207267e-06, "loss": 0.9807, "step": 1503 }, { "epoch": 0.053123992776691144, "grad_norm": 1.9966899156570435, "learning_rate": 9.986011860607611e-06, "loss": 0.9018, "step": 1504 }, { "epoch": 0.05315931458039905, "grad_norm": 2.0259809494018555, "learning_rate": 9.985969070751968e-06, "loss": 0.9429, "step": 1505 }, { "epoch": 0.05319463638410695, "grad_norm": 2.0793309211730957, "learning_rate": 9.9859262156409e-06, "loss": 0.9489, "step": 1506 }, { "epoch": 0.05322995818781486, "grad_norm": 1.789377212524414, "learning_rate": 9.985883295274966e-06, "loss": 0.925, "step": 1507 }, { "epoch": 0.053265279991522765, "grad_norm": 2.2345802783966064, "learning_rate": 9.985840309654728e-06, "loss": 0.9457, "step": 1508 }, { "epoch": 0.053300601795230676, "grad_norm": 1.8961344957351685, "learning_rate": 9.985797258780751e-06, "loss": 0.9421, "step": 1509 }, { "epoch": 0.05333592359893858, "grad_norm": 1.9467270374298096, "learning_rate": 9.985754142653597e-06, "loss": 0.9525, "step": 1510 }, { "epoch": 0.05337124540264648, "grad_norm": 1.9218577146530151, "learning_rate": 9.985710961273828e-06, "loss": 0.9404, "step": 1511 }, { "epoch": 0.053406567206354394, "grad_norm": 2.045768976211548, "learning_rate": 9.985667714642012e-06, "loss": 0.8999, "step": 1512 }, { "epoch": 0.0534418890100623, "grad_norm": 1.8726471662521362, "learning_rate": 9.985624402758714e-06, "loss": 0.9682, "step": 1513 }, { "epoch": 0.05347721081377021, "grad_norm": 1.841057538986206, "learning_rate": 9.985581025624503e-06, "loss": 0.8977, "step": 1514 }, { "epoch": 0.05351253261747811, "grad_norm": 1.8654727935791016, "learning_rate": 9.98553758323994e-06, "loss": 0.9793, "step": 1515 }, { "epoch": 0.053547854421186016, "grad_norm": 1.8803428411483765, "learning_rate": 9.985494075605603e-06, "loss": 0.9307, "step": 1516 }, { "epoch": 0.053583176224893926, "grad_norm": 1.9343518018722534, "learning_rate": 9.985450502722055e-06, "loss": 0.9822, "step": 1517 }, { "epoch": 0.05361849802860183, "grad_norm": 1.7775624990463257, "learning_rate": 9.985406864589869e-06, "loss": 0.9163, "step": 1518 }, { "epoch": 0.05365381983230973, "grad_norm": 2.2441680431365967, "learning_rate": 9.985363161209613e-06, "loss": 0.9434, "step": 1519 }, { "epoch": 0.053689141636017644, "grad_norm": 1.9147448539733887, "learning_rate": 9.98531939258186e-06, "loss": 0.932, "step": 1520 }, { "epoch": 0.05372446343972555, "grad_norm": 2.2623605728149414, "learning_rate": 9.985275558707188e-06, "loss": 0.9431, "step": 1521 }, { "epoch": 0.05375978524343346, "grad_norm": 1.918995976448059, "learning_rate": 9.985231659586164e-06, "loss": 0.9237, "step": 1522 }, { "epoch": 0.05379510704714136, "grad_norm": 1.864518165588379, "learning_rate": 9.985187695219366e-06, "loss": 0.9332, "step": 1523 }, { "epoch": 0.053830428850849266, "grad_norm": 2.1961658000946045, "learning_rate": 9.985143665607368e-06, "loss": 0.9569, "step": 1524 }, { "epoch": 0.053865750654557176, "grad_norm": 1.923466444015503, "learning_rate": 9.985099570750747e-06, "loss": 0.9021, "step": 1525 }, { "epoch": 0.05390107245826508, "grad_norm": 1.8273601531982422, "learning_rate": 9.985055410650081e-06, "loss": 0.9668, "step": 1526 }, { "epoch": 0.05393639426197299, "grad_norm": 1.9764281511306763, "learning_rate": 9.985011185305947e-06, "loss": 0.9492, "step": 1527 }, { "epoch": 0.053971716065680894, "grad_norm": 1.7592074871063232, "learning_rate": 9.984966894718922e-06, "loss": 0.8964, "step": 1528 }, { "epoch": 0.0540070378693888, "grad_norm": 1.770330548286438, "learning_rate": 9.984922538889588e-06, "loss": 0.9028, "step": 1529 }, { "epoch": 0.05404235967309671, "grad_norm": 1.6444379091262817, "learning_rate": 9.984878117818524e-06, "loss": 0.6198, "step": 1530 }, { "epoch": 0.05407768147680461, "grad_norm": 1.903709053993225, "learning_rate": 9.984833631506315e-06, "loss": 0.9462, "step": 1531 }, { "epoch": 0.05411300328051252, "grad_norm": 2.1076161861419678, "learning_rate": 9.984789079953537e-06, "loss": 0.9484, "step": 1532 }, { "epoch": 0.054148325084220426, "grad_norm": 1.9355167150497437, "learning_rate": 9.984744463160778e-06, "loss": 0.9714, "step": 1533 }, { "epoch": 0.05418364688792833, "grad_norm": 1.7627078294754028, "learning_rate": 9.984699781128622e-06, "loss": 0.9457, "step": 1534 }, { "epoch": 0.05421896869163624, "grad_norm": 1.8876060247421265, "learning_rate": 9.98465503385765e-06, "loss": 0.9482, "step": 1535 }, { "epoch": 0.054254290495344144, "grad_norm": 1.997531771659851, "learning_rate": 9.98461022134845e-06, "loss": 0.9333, "step": 1536 }, { "epoch": 0.05428961229905205, "grad_norm": 1.8184539079666138, "learning_rate": 9.98456534360161e-06, "loss": 0.9216, "step": 1537 }, { "epoch": 0.05432493410275996, "grad_norm": 1.7886922359466553, "learning_rate": 9.984520400617716e-06, "loss": 0.9298, "step": 1538 }, { "epoch": 0.05436025590646786, "grad_norm": 1.876744270324707, "learning_rate": 9.984475392397355e-06, "loss": 0.9269, "step": 1539 }, { "epoch": 0.05439557771017577, "grad_norm": 1.885982632637024, "learning_rate": 9.984430318941116e-06, "loss": 0.932, "step": 1540 }, { "epoch": 0.05443089951388368, "grad_norm": 1.8830848932266235, "learning_rate": 9.984385180249591e-06, "loss": 0.9197, "step": 1541 }, { "epoch": 0.05446622131759158, "grad_norm": 1.702519416809082, "learning_rate": 9.984339976323369e-06, "loss": 0.9165, "step": 1542 }, { "epoch": 0.05450154312129949, "grad_norm": 1.822994589805603, "learning_rate": 9.984294707163044e-06, "loss": 0.8958, "step": 1543 }, { "epoch": 0.054536864925007394, "grad_norm": 1.818505048751831, "learning_rate": 9.984249372769206e-06, "loss": 0.9429, "step": 1544 }, { "epoch": 0.054572186728715305, "grad_norm": 1.8682591915130615, "learning_rate": 9.98420397314245e-06, "loss": 0.9159, "step": 1545 }, { "epoch": 0.05460750853242321, "grad_norm": 1.7495806217193604, "learning_rate": 9.984158508283369e-06, "loss": 0.9222, "step": 1546 }, { "epoch": 0.05464283033613111, "grad_norm": 1.9314597845077515, "learning_rate": 9.984112978192558e-06, "loss": 0.9579, "step": 1547 }, { "epoch": 0.05467815213983902, "grad_norm": 1.7798038721084595, "learning_rate": 9.984067382870613e-06, "loss": 0.9185, "step": 1548 }, { "epoch": 0.05471347394354693, "grad_norm": 1.8357666730880737, "learning_rate": 9.984021722318129e-06, "loss": 0.9232, "step": 1549 }, { "epoch": 0.05474879574725483, "grad_norm": 1.9224605560302734, "learning_rate": 9.983975996535709e-06, "loss": 0.9067, "step": 1550 }, { "epoch": 0.05478411755096274, "grad_norm": 1.883701205253601, "learning_rate": 9.983930205523945e-06, "loss": 0.9086, "step": 1551 }, { "epoch": 0.054819439354670645, "grad_norm": 2.0626466274261475, "learning_rate": 9.983884349283442e-06, "loss": 0.9798, "step": 1552 }, { "epoch": 0.054854761158378555, "grad_norm": 1.853175163269043, "learning_rate": 9.983838427814796e-06, "loss": 0.9509, "step": 1553 }, { "epoch": 0.05489008296208646, "grad_norm": 1.9968410730361938, "learning_rate": 9.983792441118612e-06, "loss": 0.9744, "step": 1554 }, { "epoch": 0.05492540476579436, "grad_norm": 1.940564751625061, "learning_rate": 9.983746389195486e-06, "loss": 0.9198, "step": 1555 }, { "epoch": 0.05496072656950227, "grad_norm": 1.8834227323532104, "learning_rate": 9.983700272046026e-06, "loss": 0.9274, "step": 1556 }, { "epoch": 0.05499604837321018, "grad_norm": 1.7128658294677734, "learning_rate": 9.983654089670833e-06, "loss": 0.904, "step": 1557 }, { "epoch": 0.05503137017691809, "grad_norm": 1.7282692193984985, "learning_rate": 9.983607842070513e-06, "loss": 0.9523, "step": 1558 }, { "epoch": 0.05506669198062599, "grad_norm": 1.740864634513855, "learning_rate": 9.98356152924567e-06, "loss": 0.9055, "step": 1559 }, { "epoch": 0.055102013784333895, "grad_norm": 1.844557285308838, "learning_rate": 9.98351515119691e-06, "loss": 0.9441, "step": 1560 }, { "epoch": 0.055137335588041805, "grad_norm": 1.8533111810684204, "learning_rate": 9.98346870792484e-06, "loss": 0.9574, "step": 1561 }, { "epoch": 0.05517265739174971, "grad_norm": 1.7591232061386108, "learning_rate": 9.983422199430069e-06, "loss": 0.8895, "step": 1562 }, { "epoch": 0.05520797919545761, "grad_norm": 1.8516417741775513, "learning_rate": 9.983375625713206e-06, "loss": 0.9359, "step": 1563 }, { "epoch": 0.05524330099916552, "grad_norm": 1.9128601551055908, "learning_rate": 9.983328986774857e-06, "loss": 0.9356, "step": 1564 }, { "epoch": 0.05527862280287343, "grad_norm": 1.777256727218628, "learning_rate": 9.983282282615637e-06, "loss": 0.9372, "step": 1565 }, { "epoch": 0.05531394460658134, "grad_norm": 1.8442761898040771, "learning_rate": 9.983235513236155e-06, "loss": 0.9155, "step": 1566 }, { "epoch": 0.05534926641028924, "grad_norm": 1.8505254983901978, "learning_rate": 9.983188678637024e-06, "loss": 0.8805, "step": 1567 }, { "epoch": 0.055384588213997145, "grad_norm": 1.9411611557006836, "learning_rate": 9.983141778818855e-06, "loss": 0.9483, "step": 1568 }, { "epoch": 0.055419910017705055, "grad_norm": 1.7543507814407349, "learning_rate": 9.983094813782264e-06, "loss": 0.9263, "step": 1569 }, { "epoch": 0.05545523182141296, "grad_norm": 1.7159539461135864, "learning_rate": 9.983047783527864e-06, "loss": 0.9569, "step": 1570 }, { "epoch": 0.05549055362512087, "grad_norm": 2.8188676834106445, "learning_rate": 9.983000688056272e-06, "loss": 0.9416, "step": 1571 }, { "epoch": 0.05552587542882877, "grad_norm": 1.8259373903274536, "learning_rate": 9.982953527368104e-06, "loss": 0.9093, "step": 1572 }, { "epoch": 0.05556119723253668, "grad_norm": 1.740320086479187, "learning_rate": 9.982906301463975e-06, "loss": 0.9324, "step": 1573 }, { "epoch": 0.05559651903624459, "grad_norm": 1.7991029024124146, "learning_rate": 9.982859010344507e-06, "loss": 0.948, "step": 1574 }, { "epoch": 0.05563184083995249, "grad_norm": 2.0147275924682617, "learning_rate": 9.982811654010316e-06, "loss": 0.9366, "step": 1575 }, { "epoch": 0.0556671626436604, "grad_norm": 1.8899999856948853, "learning_rate": 9.982764232462023e-06, "loss": 0.9577, "step": 1576 }, { "epoch": 0.055702484447368306, "grad_norm": 1.9246360063552856, "learning_rate": 9.98271674570025e-06, "loss": 0.9502, "step": 1577 }, { "epoch": 0.05573780625107621, "grad_norm": 2.128286123275757, "learning_rate": 9.982669193725614e-06, "loss": 0.9518, "step": 1578 }, { "epoch": 0.05577312805478412, "grad_norm": 1.808301329612732, "learning_rate": 9.982621576538743e-06, "loss": 0.9277, "step": 1579 }, { "epoch": 0.055808449858492024, "grad_norm": 1.881839632987976, "learning_rate": 9.982573894140255e-06, "loss": 0.9145, "step": 1580 }, { "epoch": 0.05584377166219993, "grad_norm": 1.7824938297271729, "learning_rate": 9.982526146530778e-06, "loss": 0.934, "step": 1581 }, { "epoch": 0.05587909346590784, "grad_norm": 1.8089226484298706, "learning_rate": 9.982478333710935e-06, "loss": 0.9108, "step": 1582 }, { "epoch": 0.05591441526961574, "grad_norm": 2.210881471633911, "learning_rate": 9.982430455681352e-06, "loss": 0.9598, "step": 1583 }, { "epoch": 0.05594973707332365, "grad_norm": 1.903207778930664, "learning_rate": 9.982382512442655e-06, "loss": 0.9594, "step": 1584 }, { "epoch": 0.055985058877031556, "grad_norm": 1.8588007688522339, "learning_rate": 9.982334503995473e-06, "loss": 0.9485, "step": 1585 }, { "epoch": 0.05602038068073946, "grad_norm": 1.8299652338027954, "learning_rate": 9.982286430340435e-06, "loss": 0.9175, "step": 1586 }, { "epoch": 0.05605570248444737, "grad_norm": 1.8479933738708496, "learning_rate": 9.982238291478167e-06, "loss": 0.945, "step": 1587 }, { "epoch": 0.056091024288155274, "grad_norm": 1.8176006078720093, "learning_rate": 9.9821900874093e-06, "loss": 0.926, "step": 1588 }, { "epoch": 0.056126346091863184, "grad_norm": 1.3929634094238281, "learning_rate": 9.982141818134468e-06, "loss": 0.6556, "step": 1589 }, { "epoch": 0.05616166789557109, "grad_norm": 1.2419395446777344, "learning_rate": 9.9820934836543e-06, "loss": 0.6043, "step": 1590 }, { "epoch": 0.05619698969927899, "grad_norm": 2.3878655433654785, "learning_rate": 9.982045083969427e-06, "loss": 1.0496, "step": 1591 }, { "epoch": 0.0562323115029869, "grad_norm": 2.339339017868042, "learning_rate": 9.981996619080487e-06, "loss": 0.9527, "step": 1592 }, { "epoch": 0.056267633306694806, "grad_norm": 2.0407557487487793, "learning_rate": 9.98194808898811e-06, "loss": 0.9271, "step": 1593 }, { "epoch": 0.05630295511040271, "grad_norm": 1.295033574104309, "learning_rate": 9.981899493692934e-06, "loss": 0.5907, "step": 1594 }, { "epoch": 0.05633827691411062, "grad_norm": 2.6447224617004395, "learning_rate": 9.981850833195593e-06, "loss": 0.9248, "step": 1595 }, { "epoch": 0.056373598717818524, "grad_norm": 2.9724361896514893, "learning_rate": 9.981802107496725e-06, "loss": 0.9496, "step": 1596 }, { "epoch": 0.056408920521526434, "grad_norm": 2.2248551845550537, "learning_rate": 9.981753316596968e-06, "loss": 0.9553, "step": 1597 }, { "epoch": 0.05644424232523434, "grad_norm": 1.8663265705108643, "learning_rate": 9.98170446049696e-06, "loss": 0.9445, "step": 1598 }, { "epoch": 0.05647956412894224, "grad_norm": 2.316145181655884, "learning_rate": 9.981655539197342e-06, "loss": 0.9404, "step": 1599 }, { "epoch": 0.05651488593265015, "grad_norm": 2.495404005050659, "learning_rate": 9.98160655269875e-06, "loss": 0.9179, "step": 1600 }, { "epoch": 0.056550207736358056, "grad_norm": 1.9623879194259644, "learning_rate": 9.981557501001828e-06, "loss": 0.95, "step": 1601 }, { "epoch": 0.05658552954006597, "grad_norm": 1.8469792604446411, "learning_rate": 9.98150838410722e-06, "loss": 0.946, "step": 1602 }, { "epoch": 0.05662085134377387, "grad_norm": 2.5314643383026123, "learning_rate": 9.981459202015564e-06, "loss": 0.9597, "step": 1603 }, { "epoch": 0.056656173147481774, "grad_norm": 2.1342661380767822, "learning_rate": 9.981409954727508e-06, "loss": 0.9169, "step": 1604 }, { "epoch": 0.056691494951189685, "grad_norm": 1.3562532663345337, "learning_rate": 9.981360642243696e-06, "loss": 0.6046, "step": 1605 }, { "epoch": 0.05672681675489759, "grad_norm": 1.898640513420105, "learning_rate": 9.98131126456477e-06, "loss": 0.8957, "step": 1606 }, { "epoch": 0.05676213855860549, "grad_norm": 2.1659188270568848, "learning_rate": 9.98126182169138e-06, "loss": 0.9494, "step": 1607 }, { "epoch": 0.0567974603623134, "grad_norm": 1.9723291397094727, "learning_rate": 9.98121231362417e-06, "loss": 0.9001, "step": 1608 }, { "epoch": 0.056832782166021306, "grad_norm": 2.0845983028411865, "learning_rate": 9.981162740363792e-06, "loss": 0.954, "step": 1609 }, { "epoch": 0.05686810396972922, "grad_norm": 1.7786476612091064, "learning_rate": 9.981113101910892e-06, "loss": 0.9122, "step": 1610 }, { "epoch": 0.05690342577343712, "grad_norm": 1.7764252424240112, "learning_rate": 9.98106339826612e-06, "loss": 0.9635, "step": 1611 }, { "epoch": 0.056938747577145024, "grad_norm": 1.7602839469909668, "learning_rate": 9.981013629430126e-06, "loss": 0.8868, "step": 1612 }, { "epoch": 0.056974069380852935, "grad_norm": 1.9795044660568237, "learning_rate": 9.98096379540356e-06, "loss": 0.9431, "step": 1613 }, { "epoch": 0.05700939118456084, "grad_norm": 2.206389904022217, "learning_rate": 9.980913896187079e-06, "loss": 0.9584, "step": 1614 }, { "epoch": 0.05704471298826875, "grad_norm": 1.846515417098999, "learning_rate": 9.980863931781333e-06, "loss": 0.9412, "step": 1615 }, { "epoch": 0.05708003479197665, "grad_norm": 1.9159010648727417, "learning_rate": 9.980813902186974e-06, "loss": 0.9412, "step": 1616 }, { "epoch": 0.057115356595684556, "grad_norm": 2.0072667598724365, "learning_rate": 9.980763807404661e-06, "loss": 0.9197, "step": 1617 }, { "epoch": 0.05715067839939247, "grad_norm": 2.0744214057922363, "learning_rate": 9.980713647435047e-06, "loss": 0.9627, "step": 1618 }, { "epoch": 0.05718600020310037, "grad_norm": 1.7306188344955444, "learning_rate": 9.980663422278789e-06, "loss": 0.9116, "step": 1619 }, { "epoch": 0.057221322006808274, "grad_norm": 1.9925593137741089, "learning_rate": 9.980613131936545e-06, "loss": 0.9586, "step": 1620 }, { "epoch": 0.057256643810516185, "grad_norm": 1.99173903465271, "learning_rate": 9.980562776408972e-06, "loss": 0.9244, "step": 1621 }, { "epoch": 0.05729196561422409, "grad_norm": 1.89407479763031, "learning_rate": 9.980512355696729e-06, "loss": 0.9591, "step": 1622 }, { "epoch": 0.057327287417932, "grad_norm": 1.7631887197494507, "learning_rate": 9.980461869800477e-06, "loss": 0.918, "step": 1623 }, { "epoch": 0.0573626092216399, "grad_norm": 1.81754469871521, "learning_rate": 9.980411318720877e-06, "loss": 0.9195, "step": 1624 }, { "epoch": 0.057397931025347806, "grad_norm": 1.8520455360412598, "learning_rate": 9.98036070245859e-06, "loss": 0.9176, "step": 1625 }, { "epoch": 0.05743325282905572, "grad_norm": 1.7270941734313965, "learning_rate": 9.980310021014277e-06, "loss": 0.905, "step": 1626 }, { "epoch": 0.05746857463276362, "grad_norm": 1.8440498113632202, "learning_rate": 9.980259274388603e-06, "loss": 0.9344, "step": 1627 }, { "epoch": 0.05750389643647153, "grad_norm": 1.956961750984192, "learning_rate": 9.980208462582233e-06, "loss": 0.8915, "step": 1628 }, { "epoch": 0.057539218240179435, "grad_norm": 1.978684425354004, "learning_rate": 9.980157585595829e-06, "loss": 0.968, "step": 1629 }, { "epoch": 0.05757454004388734, "grad_norm": 1.858888030052185, "learning_rate": 9.980106643430058e-06, "loss": 0.9253, "step": 1630 }, { "epoch": 0.05760986184759525, "grad_norm": 1.7112654447555542, "learning_rate": 9.98005563608559e-06, "loss": 0.9126, "step": 1631 }, { "epoch": 0.05764518365130315, "grad_norm": 1.935694694519043, "learning_rate": 9.980004563563089e-06, "loss": 0.9406, "step": 1632 }, { "epoch": 0.057680505455011064, "grad_norm": 1.8566962480545044, "learning_rate": 9.979953425863225e-06, "loss": 0.8936, "step": 1633 }, { "epoch": 0.05771582725871897, "grad_norm": 1.8728218078613281, "learning_rate": 9.979902222986666e-06, "loss": 0.9444, "step": 1634 }, { "epoch": 0.05775114906242687, "grad_norm": 1.2738769054412842, "learning_rate": 9.979850954934083e-06, "loss": 0.6058, "step": 1635 }, { "epoch": 0.05778647086613478, "grad_norm": 2.103384256362915, "learning_rate": 9.979799621706147e-06, "loss": 0.9315, "step": 1636 }, { "epoch": 0.057821792669842685, "grad_norm": 1.8926962614059448, "learning_rate": 9.97974822330353e-06, "loss": 0.9485, "step": 1637 }, { "epoch": 0.05785711447355059, "grad_norm": 1.7247858047485352, "learning_rate": 9.979696759726903e-06, "loss": 0.9031, "step": 1638 }, { "epoch": 0.0578924362772585, "grad_norm": 1.9122203588485718, "learning_rate": 9.979645230976942e-06, "loss": 0.9243, "step": 1639 }, { "epoch": 0.0579277580809664, "grad_norm": 1.849788784980774, "learning_rate": 9.97959363705432e-06, "loss": 0.9311, "step": 1640 }, { "epoch": 0.057963079884674314, "grad_norm": 1.9129037857055664, "learning_rate": 9.979541977959711e-06, "loss": 0.9546, "step": 1641 }, { "epoch": 0.05799840168838222, "grad_norm": 1.9562634229660034, "learning_rate": 9.979490253693794e-06, "loss": 0.9324, "step": 1642 }, { "epoch": 0.05803372349209012, "grad_norm": 1.798912525177002, "learning_rate": 9.979438464257244e-06, "loss": 0.9249, "step": 1643 }, { "epoch": 0.05806904529579803, "grad_norm": 1.8434417247772217, "learning_rate": 9.97938660965074e-06, "loss": 0.8877, "step": 1644 }, { "epoch": 0.058104367099505935, "grad_norm": 1.860654592514038, "learning_rate": 9.979334689874962e-06, "loss": 0.9319, "step": 1645 }, { "epoch": 0.058139688903213846, "grad_norm": 1.8225411176681519, "learning_rate": 9.979282704930585e-06, "loss": 0.9391, "step": 1646 }, { "epoch": 0.05817501070692175, "grad_norm": 1.948703408241272, "learning_rate": 9.979230654818293e-06, "loss": 0.9062, "step": 1647 }, { "epoch": 0.05821033251062965, "grad_norm": 1.8620460033416748, "learning_rate": 9.979178539538765e-06, "loss": 0.9165, "step": 1648 }, { "epoch": 0.058245654314337564, "grad_norm": 1.971071481704712, "learning_rate": 9.979126359092686e-06, "loss": 0.9344, "step": 1649 }, { "epoch": 0.05828097611804547, "grad_norm": 2.0300512313842773, "learning_rate": 9.979074113480736e-06, "loss": 0.9141, "step": 1650 }, { "epoch": 0.05831629792175337, "grad_norm": 1.782050609588623, "learning_rate": 9.979021802703601e-06, "loss": 0.9117, "step": 1651 }, { "epoch": 0.05835161972546128, "grad_norm": 1.3732389211654663, "learning_rate": 9.978969426761964e-06, "loss": 0.5977, "step": 1652 }, { "epoch": 0.058386941529169185, "grad_norm": 2.1647591590881348, "learning_rate": 9.97891698565651e-06, "loss": 0.9173, "step": 1653 }, { "epoch": 0.058422263332877096, "grad_norm": 2.0144898891448975, "learning_rate": 9.978864479387927e-06, "loss": 0.9587, "step": 1654 }, { "epoch": 0.058457585136585, "grad_norm": 1.7196123600006104, "learning_rate": 9.978811907956902e-06, "loss": 0.9514, "step": 1655 }, { "epoch": 0.0584929069402929, "grad_norm": 1.7463082075119019, "learning_rate": 9.978759271364123e-06, "loss": 0.9216, "step": 1656 }, { "epoch": 0.058528228744000814, "grad_norm": 2.5356388092041016, "learning_rate": 9.97870656961028e-06, "loss": 0.9152, "step": 1657 }, { "epoch": 0.05856355054770872, "grad_norm": 1.9972442388534546, "learning_rate": 9.97865380269606e-06, "loss": 0.9507, "step": 1658 }, { "epoch": 0.05859887235141663, "grad_norm": 2.0260233879089355, "learning_rate": 9.978600970622152e-06, "loss": 0.9669, "step": 1659 }, { "epoch": 0.05863419415512453, "grad_norm": 2.197370767593384, "learning_rate": 9.978548073389254e-06, "loss": 0.9646, "step": 1660 }, { "epoch": 0.058669515958832436, "grad_norm": 2.011199474334717, "learning_rate": 9.978495110998053e-06, "loss": 0.9436, "step": 1661 }, { "epoch": 0.058704837762540346, "grad_norm": 1.9636526107788086, "learning_rate": 9.978442083449245e-06, "loss": 0.9324, "step": 1662 }, { "epoch": 0.05874015956624825, "grad_norm": 1.8087564706802368, "learning_rate": 9.978388990743521e-06, "loss": 0.9283, "step": 1663 }, { "epoch": 0.05877548136995615, "grad_norm": 1.9652029275894165, "learning_rate": 9.97833583288158e-06, "loss": 0.9399, "step": 1664 }, { "epoch": 0.058810803173664064, "grad_norm": 2.1006476879119873, "learning_rate": 9.978282609864113e-06, "loss": 0.9262, "step": 1665 }, { "epoch": 0.05884612497737197, "grad_norm": 1.7612839937210083, "learning_rate": 9.978229321691822e-06, "loss": 0.9247, "step": 1666 }, { "epoch": 0.05888144678107988, "grad_norm": 1.7976152896881104, "learning_rate": 9.9781759683654e-06, "loss": 0.9137, "step": 1667 }, { "epoch": 0.05891676858478778, "grad_norm": 1.9056639671325684, "learning_rate": 9.978122549885546e-06, "loss": 0.9542, "step": 1668 }, { "epoch": 0.058952090388495686, "grad_norm": 1.742702841758728, "learning_rate": 9.97806906625296e-06, "loss": 0.8968, "step": 1669 }, { "epoch": 0.058987412192203596, "grad_norm": 2.1130259037017822, "learning_rate": 9.978015517468343e-06, "loss": 0.8954, "step": 1670 }, { "epoch": 0.0590227339959115, "grad_norm": 1.645154356956482, "learning_rate": 9.977961903532395e-06, "loss": 0.9165, "step": 1671 }, { "epoch": 0.05905805579961941, "grad_norm": 1.715266466140747, "learning_rate": 9.977908224445815e-06, "loss": 0.9199, "step": 1672 }, { "epoch": 0.059093377603327314, "grad_norm": 1.7885648012161255, "learning_rate": 9.97785448020931e-06, "loss": 0.9206, "step": 1673 }, { "epoch": 0.05912869940703522, "grad_norm": 2.249072551727295, "learning_rate": 9.977800670823578e-06, "loss": 0.9177, "step": 1674 }, { "epoch": 0.05916402121074313, "grad_norm": 1.8963066339492798, "learning_rate": 9.97774679628933e-06, "loss": 0.9406, "step": 1675 }, { "epoch": 0.05919934301445103, "grad_norm": 1.8455945253372192, "learning_rate": 9.977692856607268e-06, "loss": 0.9662, "step": 1676 }, { "epoch": 0.05923466481815894, "grad_norm": 1.8251436948776245, "learning_rate": 9.977638851778095e-06, "loss": 0.9433, "step": 1677 }, { "epoch": 0.059269986621866846, "grad_norm": 1.8348627090454102, "learning_rate": 9.977584781802524e-06, "loss": 0.9681, "step": 1678 }, { "epoch": 0.05930530842557475, "grad_norm": 1.953618049621582, "learning_rate": 9.977530646681255e-06, "loss": 0.9344, "step": 1679 }, { "epoch": 0.05934063022928266, "grad_norm": 1.8826688528060913, "learning_rate": 9.977476446415001e-06, "loss": 0.9262, "step": 1680 }, { "epoch": 0.059375952032990564, "grad_norm": 1.8453022241592407, "learning_rate": 9.977422181004473e-06, "loss": 0.9063, "step": 1681 }, { "epoch": 0.05941127383669847, "grad_norm": 1.7800936698913574, "learning_rate": 9.97736785045038e-06, "loss": 0.9025, "step": 1682 }, { "epoch": 0.05944659564040638, "grad_norm": 1.9069504737854004, "learning_rate": 9.97731345475343e-06, "loss": 0.9117, "step": 1683 }, { "epoch": 0.05948191744411428, "grad_norm": 2.0127780437469482, "learning_rate": 9.977258993914337e-06, "loss": 0.9025, "step": 1684 }, { "epoch": 0.05951723924782219, "grad_norm": 1.9800803661346436, "learning_rate": 9.977204467933815e-06, "loss": 0.9246, "step": 1685 }, { "epoch": 0.0595525610515301, "grad_norm": 1.8740856647491455, "learning_rate": 9.977149876812575e-06, "loss": 0.9257, "step": 1686 }, { "epoch": 0.059587882855238, "grad_norm": 1.8152852058410645, "learning_rate": 9.977095220551334e-06, "loss": 0.9261, "step": 1687 }, { "epoch": 0.05962320465894591, "grad_norm": 1.9786638021469116, "learning_rate": 9.977040499150807e-06, "loss": 0.927, "step": 1688 }, { "epoch": 0.059658526462653814, "grad_norm": 1.8240395784378052, "learning_rate": 9.97698571261171e-06, "loss": 0.9145, "step": 1689 }, { "epoch": 0.059693848266361725, "grad_norm": 2.118248462677002, "learning_rate": 9.97693086093476e-06, "loss": 0.9461, "step": 1690 }, { "epoch": 0.05972917007006963, "grad_norm": 2.0304653644561768, "learning_rate": 9.976875944120673e-06, "loss": 0.9182, "step": 1691 }, { "epoch": 0.05976449187377753, "grad_norm": 1.7569425106048584, "learning_rate": 9.976820962170168e-06, "loss": 0.888, "step": 1692 }, { "epoch": 0.05979981367748544, "grad_norm": 1.824305772781372, "learning_rate": 9.976765915083967e-06, "loss": 0.8858, "step": 1693 }, { "epoch": 0.05983513548119335, "grad_norm": 1.9419986009597778, "learning_rate": 9.97671080286279e-06, "loss": 0.9723, "step": 1694 }, { "epoch": 0.05987045728490125, "grad_norm": 2.1003243923187256, "learning_rate": 9.976655625507356e-06, "loss": 0.9261, "step": 1695 }, { "epoch": 0.05990577908860916, "grad_norm": 2.3233273029327393, "learning_rate": 9.976600383018389e-06, "loss": 0.9444, "step": 1696 }, { "epoch": 0.059941100892317065, "grad_norm": 1.834836483001709, "learning_rate": 9.976545075396613e-06, "loss": 0.9077, "step": 1697 }, { "epoch": 0.059976422696024975, "grad_norm": 1.9310245513916016, "learning_rate": 9.97648970264275e-06, "loss": 0.9064, "step": 1698 }, { "epoch": 0.06001174449973288, "grad_norm": 1.846811056137085, "learning_rate": 9.976434264757523e-06, "loss": 0.9101, "step": 1699 }, { "epoch": 0.06004706630344078, "grad_norm": 2.0043818950653076, "learning_rate": 9.976378761741662e-06, "loss": 0.9335, "step": 1700 }, { "epoch": 0.06008238810714869, "grad_norm": 1.904567837715149, "learning_rate": 9.976323193595891e-06, "loss": 0.9295, "step": 1701 }, { "epoch": 0.0601177099108566, "grad_norm": 2.04067063331604, "learning_rate": 9.976267560320939e-06, "loss": 0.9221, "step": 1702 }, { "epoch": 0.06015303171456451, "grad_norm": 2.167438507080078, "learning_rate": 9.976211861917532e-06, "loss": 0.9049, "step": 1703 }, { "epoch": 0.06018835351827241, "grad_norm": 1.9290794134140015, "learning_rate": 9.976156098386398e-06, "loss": 0.9146, "step": 1704 }, { "epoch": 0.060223675321980315, "grad_norm": 1.8682676553726196, "learning_rate": 9.976100269728268e-06, "loss": 0.9306, "step": 1705 }, { "epoch": 0.060258997125688225, "grad_norm": 1.8711919784545898, "learning_rate": 9.976044375943875e-06, "loss": 0.9142, "step": 1706 }, { "epoch": 0.06029431892939613, "grad_norm": 2.0078272819519043, "learning_rate": 9.975988417033949e-06, "loss": 0.9266, "step": 1707 }, { "epoch": 0.06032964073310403, "grad_norm": 1.7616312503814697, "learning_rate": 9.97593239299922e-06, "loss": 0.9243, "step": 1708 }, { "epoch": 0.06036496253681194, "grad_norm": 1.8906420469284058, "learning_rate": 9.975876303840423e-06, "loss": 0.9394, "step": 1709 }, { "epoch": 0.06040028434051985, "grad_norm": 1.886036992073059, "learning_rate": 9.975820149558293e-06, "loss": 0.8676, "step": 1710 }, { "epoch": 0.06043560614422776, "grad_norm": 2.266735792160034, "learning_rate": 9.975763930153565e-06, "loss": 0.9451, "step": 1711 }, { "epoch": 0.06047092794793566, "grad_norm": 1.9013643264770508, "learning_rate": 9.975707645626974e-06, "loss": 0.903, "step": 1712 }, { "epoch": 0.060506249751643565, "grad_norm": 2.018486738204956, "learning_rate": 9.975651295979256e-06, "loss": 0.9463, "step": 1713 }, { "epoch": 0.060541571555351475, "grad_norm": 1.9316234588623047, "learning_rate": 9.975594881211149e-06, "loss": 0.9572, "step": 1714 }, { "epoch": 0.06057689335905938, "grad_norm": 1.9363079071044922, "learning_rate": 9.975538401323392e-06, "loss": 0.9444, "step": 1715 }, { "epoch": 0.06061221516276729, "grad_norm": 1.8623656034469604, "learning_rate": 9.975481856316724e-06, "loss": 0.9357, "step": 1716 }, { "epoch": 0.06064753696647519, "grad_norm": 1.7568379640579224, "learning_rate": 9.975425246191884e-06, "loss": 0.9834, "step": 1717 }, { "epoch": 0.0606828587701831, "grad_norm": 1.8187180757522583, "learning_rate": 9.975368570949614e-06, "loss": 0.9346, "step": 1718 }, { "epoch": 0.06071818057389101, "grad_norm": 1.7051341533660889, "learning_rate": 9.975311830590655e-06, "loss": 0.9371, "step": 1719 }, { "epoch": 0.06075350237759891, "grad_norm": 1.8066741228103638, "learning_rate": 9.975255025115751e-06, "loss": 0.9259, "step": 1720 }, { "epoch": 0.060788824181306815, "grad_norm": 1.8448723554611206, "learning_rate": 9.975198154525644e-06, "loss": 0.9485, "step": 1721 }, { "epoch": 0.060824145985014726, "grad_norm": 1.8037928342819214, "learning_rate": 9.975141218821078e-06, "loss": 0.9603, "step": 1722 }, { "epoch": 0.06085946778872263, "grad_norm": 1.6095211505889893, "learning_rate": 9.9750842180028e-06, "loss": 0.8652, "step": 1723 }, { "epoch": 0.06089478959243054, "grad_norm": 1.7436754703521729, "learning_rate": 9.975027152071554e-06, "loss": 0.9247, "step": 1724 }, { "epoch": 0.060930111396138444, "grad_norm": 1.7025952339172363, "learning_rate": 9.974970021028089e-06, "loss": 0.8942, "step": 1725 }, { "epoch": 0.06096543319984635, "grad_norm": 1.9411929845809937, "learning_rate": 9.97491282487315e-06, "loss": 0.9348, "step": 1726 }, { "epoch": 0.06100075500355426, "grad_norm": 1.7257333993911743, "learning_rate": 9.974855563607488e-06, "loss": 0.919, "step": 1727 }, { "epoch": 0.06103607680726216, "grad_norm": 1.902626872062683, "learning_rate": 9.974798237231852e-06, "loss": 0.9331, "step": 1728 }, { "epoch": 0.06107139861097007, "grad_norm": 1.8252092599868774, "learning_rate": 9.974740845746993e-06, "loss": 0.9068, "step": 1729 }, { "epoch": 0.061106720414677976, "grad_norm": 1.8164323568344116, "learning_rate": 9.97468338915366e-06, "loss": 0.9284, "step": 1730 }, { "epoch": 0.06114204221838588, "grad_norm": 2.146127223968506, "learning_rate": 9.974625867452604e-06, "loss": 0.9532, "step": 1731 }, { "epoch": 0.06117736402209379, "grad_norm": 1.8558051586151123, "learning_rate": 9.97456828064458e-06, "loss": 0.93, "step": 1732 }, { "epoch": 0.061212685825801694, "grad_norm": 1.6722300052642822, "learning_rate": 9.974510628730344e-06, "loss": 0.8913, "step": 1733 }, { "epoch": 0.061248007629509604, "grad_norm": 1.8885194063186646, "learning_rate": 9.974452911710646e-06, "loss": 0.9447, "step": 1734 }, { "epoch": 0.06128332943321751, "grad_norm": 1.784645438194275, "learning_rate": 9.974395129586244e-06, "loss": 0.91, "step": 1735 }, { "epoch": 0.06131865123692541, "grad_norm": 1.7544288635253906, "learning_rate": 9.974337282357892e-06, "loss": 0.8987, "step": 1736 }, { "epoch": 0.06135397304063332, "grad_norm": 1.8921281099319458, "learning_rate": 9.974279370026351e-06, "loss": 0.9509, "step": 1737 }, { "epoch": 0.061389294844341226, "grad_norm": 1.84134042263031, "learning_rate": 9.974221392592377e-06, "loss": 0.9254, "step": 1738 }, { "epoch": 0.06142461664804913, "grad_norm": 1.922099232673645, "learning_rate": 9.974163350056729e-06, "loss": 0.9309, "step": 1739 }, { "epoch": 0.06145993845175704, "grad_norm": 1.9794530868530273, "learning_rate": 9.974105242420164e-06, "loss": 0.8991, "step": 1740 }, { "epoch": 0.061495260255464944, "grad_norm": 1.6376087665557861, "learning_rate": 9.974047069683444e-06, "loss": 0.917, "step": 1741 }, { "epoch": 0.061530582059172854, "grad_norm": 1.8822885751724243, "learning_rate": 9.973988831847332e-06, "loss": 0.9439, "step": 1742 }, { "epoch": 0.06156590386288076, "grad_norm": 1.7893913984298706, "learning_rate": 9.97393052891259e-06, "loss": 0.9288, "step": 1743 }, { "epoch": 0.06160122566658866, "grad_norm": 1.9941397905349731, "learning_rate": 9.973872160879981e-06, "loss": 0.9357, "step": 1744 }, { "epoch": 0.06163654747029657, "grad_norm": 1.845123052597046, "learning_rate": 9.973813727750266e-06, "loss": 0.9332, "step": 1745 }, { "epoch": 0.061671869274004476, "grad_norm": 1.8597139120101929, "learning_rate": 9.973755229524212e-06, "loss": 0.9579, "step": 1746 }, { "epoch": 0.06170719107771239, "grad_norm": 1.8229877948760986, "learning_rate": 9.973696666202585e-06, "loss": 0.9067, "step": 1747 }, { "epoch": 0.06174251288142029, "grad_norm": 1.9363408088684082, "learning_rate": 9.973638037786151e-06, "loss": 0.9534, "step": 1748 }, { "epoch": 0.061777834685128194, "grad_norm": 1.6481932401657104, "learning_rate": 9.973579344275677e-06, "loss": 0.8764, "step": 1749 }, { "epoch": 0.061813156488836105, "grad_norm": 1.70127272605896, "learning_rate": 9.973520585671933e-06, "loss": 0.9245, "step": 1750 }, { "epoch": 0.06184847829254401, "grad_norm": 1.743006944656372, "learning_rate": 9.973461761975685e-06, "loss": 0.9344, "step": 1751 }, { "epoch": 0.06188380009625191, "grad_norm": 1.698057770729065, "learning_rate": 9.973402873187706e-06, "loss": 0.9189, "step": 1752 }, { "epoch": 0.06191912189995982, "grad_norm": 1.7413004636764526, "learning_rate": 9.973343919308763e-06, "loss": 0.9113, "step": 1753 }, { "epoch": 0.061954443703667726, "grad_norm": 2.586045265197754, "learning_rate": 9.97328490033963e-06, "loss": 0.9349, "step": 1754 }, { "epoch": 0.06198976550737564, "grad_norm": 1.9049164056777954, "learning_rate": 9.97322581628108e-06, "loss": 0.9038, "step": 1755 }, { "epoch": 0.06202508731108354, "grad_norm": 1.832501769065857, "learning_rate": 9.973166667133886e-06, "loss": 0.9769, "step": 1756 }, { "epoch": 0.062060409114791444, "grad_norm": 1.8192667961120605, "learning_rate": 9.97310745289882e-06, "loss": 0.9214, "step": 1757 }, { "epoch": 0.062095730918499355, "grad_norm": 1.4337241649627686, "learning_rate": 9.973048173576659e-06, "loss": 0.5901, "step": 1758 }, { "epoch": 0.06213105272220726, "grad_norm": 2.064716339111328, "learning_rate": 9.972988829168177e-06, "loss": 0.8973, "step": 1759 }, { "epoch": 0.06216637452591517, "grad_norm": 1.962764859199524, "learning_rate": 9.972929419674156e-06, "loss": 0.9186, "step": 1760 }, { "epoch": 0.06220169632962307, "grad_norm": 1.7883564233779907, "learning_rate": 9.972869945095365e-06, "loss": 0.9166, "step": 1761 }, { "epoch": 0.062237018133330976, "grad_norm": 1.9664424657821655, "learning_rate": 9.972810405432589e-06, "loss": 0.9633, "step": 1762 }, { "epoch": 0.06227233993703889, "grad_norm": 2.121384859085083, "learning_rate": 9.972750800686606e-06, "loss": 0.9112, "step": 1763 }, { "epoch": 0.06230766174074679, "grad_norm": 1.8147406578063965, "learning_rate": 9.972691130858194e-06, "loss": 0.8843, "step": 1764 }, { "epoch": 0.062342983544454694, "grad_norm": 1.9827433824539185, "learning_rate": 9.972631395948136e-06, "loss": 0.9048, "step": 1765 }, { "epoch": 0.062378305348162605, "grad_norm": 2.1015939712524414, "learning_rate": 9.972571595957212e-06, "loss": 0.9655, "step": 1766 }, { "epoch": 0.06241362715187051, "grad_norm": 1.8923085927963257, "learning_rate": 9.972511730886207e-06, "loss": 0.9301, "step": 1767 }, { "epoch": 0.06244894895557842, "grad_norm": 1.9390621185302734, "learning_rate": 9.972451800735903e-06, "loss": 0.9112, "step": 1768 }, { "epoch": 0.06248427075928632, "grad_norm": 1.9703447818756104, "learning_rate": 9.972391805507084e-06, "loss": 0.9223, "step": 1769 }, { "epoch": 0.06251959256299423, "grad_norm": 1.8229906558990479, "learning_rate": 9.972331745200536e-06, "loss": 0.9296, "step": 1770 }, { "epoch": 0.06255491436670213, "grad_norm": 1.8454716205596924, "learning_rate": 9.972271619817044e-06, "loss": 0.96, "step": 1771 }, { "epoch": 0.06259023617041004, "grad_norm": 1.989412784576416, "learning_rate": 9.972211429357397e-06, "loss": 0.9278, "step": 1772 }, { "epoch": 0.06262555797411795, "grad_norm": 1.7823299169540405, "learning_rate": 9.97215117382238e-06, "loss": 0.9526, "step": 1773 }, { "epoch": 0.06266087977782585, "grad_norm": 1.7694376707077026, "learning_rate": 9.972090853212784e-06, "loss": 0.9174, "step": 1774 }, { "epoch": 0.06269620158153376, "grad_norm": 1.8413572311401367, "learning_rate": 9.972030467529399e-06, "loss": 0.9086, "step": 1775 }, { "epoch": 0.06273152338524167, "grad_norm": 1.764097809791565, "learning_rate": 9.97197001677301e-06, "loss": 0.9062, "step": 1776 }, { "epoch": 0.06276684518894958, "grad_norm": 1.890869379043579, "learning_rate": 9.971909500944418e-06, "loss": 0.9222, "step": 1777 }, { "epoch": 0.06280216699265748, "grad_norm": 1.8221180438995361, "learning_rate": 9.971848920044405e-06, "loss": 0.8831, "step": 1778 }, { "epoch": 0.06283748879636539, "grad_norm": 1.935787558555603, "learning_rate": 9.971788274073769e-06, "loss": 0.9427, "step": 1779 }, { "epoch": 0.0628728106000733, "grad_norm": 1.7097907066345215, "learning_rate": 9.9717275630333e-06, "loss": 0.8871, "step": 1780 }, { "epoch": 0.0629081324037812, "grad_norm": 1.6667624711990356, "learning_rate": 9.971666786923798e-06, "loss": 0.8814, "step": 1781 }, { "epoch": 0.0629434542074891, "grad_norm": 1.785264015197754, "learning_rate": 9.971605945746055e-06, "loss": 0.9159, "step": 1782 }, { "epoch": 0.06297877601119702, "grad_norm": 1.8388975858688354, "learning_rate": 9.971545039500869e-06, "loss": 0.9304, "step": 1783 }, { "epoch": 0.06301409781490491, "grad_norm": 1.844496488571167, "learning_rate": 9.971484068189034e-06, "loss": 0.9122, "step": 1784 }, { "epoch": 0.06304941961861282, "grad_norm": 1.9162068367004395, "learning_rate": 9.971423031811351e-06, "loss": 0.9002, "step": 1785 }, { "epoch": 0.06308474142232073, "grad_norm": 1.7548894882202148, "learning_rate": 9.97136193036862e-06, "loss": 0.9124, "step": 1786 }, { "epoch": 0.06312006322602863, "grad_norm": 1.7777196168899536, "learning_rate": 9.971300763861636e-06, "loss": 0.9265, "step": 1787 }, { "epoch": 0.06315538502973654, "grad_norm": 1.9992694854736328, "learning_rate": 9.971239532291202e-06, "loss": 0.9195, "step": 1788 }, { "epoch": 0.06319070683344445, "grad_norm": 1.9222179651260376, "learning_rate": 9.97117823565812e-06, "loss": 0.9558, "step": 1789 }, { "epoch": 0.06322602863715236, "grad_norm": 1.9583115577697754, "learning_rate": 9.971116873963191e-06, "loss": 0.9447, "step": 1790 }, { "epoch": 0.06326135044086026, "grad_norm": 1.8094666004180908, "learning_rate": 9.971055447207222e-06, "loss": 0.8979, "step": 1791 }, { "epoch": 0.06329667224456817, "grad_norm": 1.904240608215332, "learning_rate": 9.97099395539101e-06, "loss": 0.9483, "step": 1792 }, { "epoch": 0.06333199404827608, "grad_norm": 1.7443090677261353, "learning_rate": 9.970932398515365e-06, "loss": 0.875, "step": 1793 }, { "epoch": 0.06336731585198398, "grad_norm": 1.6861921548843384, "learning_rate": 9.970870776581091e-06, "loss": 0.9222, "step": 1794 }, { "epoch": 0.06340263765569189, "grad_norm": 1.838716983795166, "learning_rate": 9.970809089588995e-06, "loss": 0.9218, "step": 1795 }, { "epoch": 0.0634379594593998, "grad_norm": 2.160618305206299, "learning_rate": 9.970747337539886e-06, "loss": 0.894, "step": 1796 }, { "epoch": 0.0634732812631077, "grad_norm": 1.9216501712799072, "learning_rate": 9.970685520434566e-06, "loss": 0.9138, "step": 1797 }, { "epoch": 0.0635086030668156, "grad_norm": 1.8389770984649658, "learning_rate": 9.970623638273851e-06, "loss": 0.9584, "step": 1798 }, { "epoch": 0.06354392487052352, "grad_norm": 1.9933526515960693, "learning_rate": 9.97056169105855e-06, "loss": 0.9091, "step": 1799 }, { "epoch": 0.06357924667423141, "grad_norm": 1.76357901096344, "learning_rate": 9.970499678789467e-06, "loss": 0.9102, "step": 1800 }, { "epoch": 0.06361456847793932, "grad_norm": 1.9584935903549194, "learning_rate": 9.970437601467421e-06, "loss": 0.9248, "step": 1801 }, { "epoch": 0.06364989028164723, "grad_norm": 2.214224338531494, "learning_rate": 9.970375459093225e-06, "loss": 0.897, "step": 1802 }, { "epoch": 0.06368521208535514, "grad_norm": 1.805284857749939, "learning_rate": 9.970313251667686e-06, "loss": 0.9341, "step": 1803 }, { "epoch": 0.06372053388906304, "grad_norm": 1.6981264352798462, "learning_rate": 9.970250979191622e-06, "loss": 0.9395, "step": 1804 }, { "epoch": 0.06375585569277095, "grad_norm": 1.7766755819320679, "learning_rate": 9.970188641665847e-06, "loss": 0.8802, "step": 1805 }, { "epoch": 0.06379117749647886, "grad_norm": 1.868263602256775, "learning_rate": 9.970126239091178e-06, "loss": 0.9345, "step": 1806 }, { "epoch": 0.06382649930018676, "grad_norm": 1.811094045639038, "learning_rate": 9.97006377146843e-06, "loss": 0.9171, "step": 1807 }, { "epoch": 0.06386182110389467, "grad_norm": 2.051579713821411, "learning_rate": 9.970001238798424e-06, "loss": 0.9275, "step": 1808 }, { "epoch": 0.06389714290760258, "grad_norm": 2.1081466674804688, "learning_rate": 9.969938641081975e-06, "loss": 0.9926, "step": 1809 }, { "epoch": 0.06393246471131048, "grad_norm": 1.8534455299377441, "learning_rate": 9.969875978319902e-06, "loss": 0.9071, "step": 1810 }, { "epoch": 0.06396778651501839, "grad_norm": 2.005218505859375, "learning_rate": 9.969813250513028e-06, "loss": 0.9085, "step": 1811 }, { "epoch": 0.0640031083187263, "grad_norm": 1.9961813688278198, "learning_rate": 9.969750457662171e-06, "loss": 0.9176, "step": 1812 }, { "epoch": 0.06403843012243421, "grad_norm": 1.8337658643722534, "learning_rate": 9.969687599768155e-06, "loss": 0.9138, "step": 1813 }, { "epoch": 0.0640737519261421, "grad_norm": 1.8520547151565552, "learning_rate": 9.969624676831801e-06, "loss": 0.9148, "step": 1814 }, { "epoch": 0.06410907372985002, "grad_norm": 1.8591840267181396, "learning_rate": 9.969561688853936e-06, "loss": 0.9091, "step": 1815 }, { "epoch": 0.06414439553355793, "grad_norm": 1.7456119060516357, "learning_rate": 9.96949863583538e-06, "loss": 0.9115, "step": 1816 }, { "epoch": 0.06417971733726582, "grad_norm": 1.8803391456604004, "learning_rate": 9.96943551777696e-06, "loss": 0.9277, "step": 1817 }, { "epoch": 0.06421503914097373, "grad_norm": 1.753326416015625, "learning_rate": 9.969372334679502e-06, "loss": 0.9062, "step": 1818 }, { "epoch": 0.06425036094468164, "grad_norm": 1.810172438621521, "learning_rate": 9.969309086543834e-06, "loss": 0.8983, "step": 1819 }, { "epoch": 0.06428568274838954, "grad_norm": 1.7609981298446655, "learning_rate": 9.969245773370782e-06, "loss": 0.9192, "step": 1820 }, { "epoch": 0.06432100455209745, "grad_norm": 1.7686305046081543, "learning_rate": 9.969182395161175e-06, "loss": 0.9111, "step": 1821 }, { "epoch": 0.06435632635580536, "grad_norm": 1.8066036701202393, "learning_rate": 9.969118951915843e-06, "loss": 0.9565, "step": 1822 }, { "epoch": 0.06439164815951326, "grad_norm": 2.0352509021759033, "learning_rate": 9.969055443635616e-06, "loss": 0.8989, "step": 1823 }, { "epoch": 0.06442696996322117, "grad_norm": 1.7809139490127563, "learning_rate": 9.968991870321328e-06, "loss": 0.9027, "step": 1824 }, { "epoch": 0.06446229176692908, "grad_norm": 1.6653608083724976, "learning_rate": 9.968928231973805e-06, "loss": 0.9034, "step": 1825 }, { "epoch": 0.06449761357063699, "grad_norm": 1.761703610420227, "learning_rate": 9.968864528593886e-06, "loss": 0.9243, "step": 1826 }, { "epoch": 0.06453293537434489, "grad_norm": 1.781698226928711, "learning_rate": 9.9688007601824e-06, "loss": 0.9354, "step": 1827 }, { "epoch": 0.0645682571780528, "grad_norm": 1.7750746011734009, "learning_rate": 9.968736926740186e-06, "loss": 0.9337, "step": 1828 }, { "epoch": 0.06460357898176071, "grad_norm": 1.864699363708496, "learning_rate": 9.968673028268074e-06, "loss": 0.9427, "step": 1829 }, { "epoch": 0.0646389007854686, "grad_norm": 1.9887864589691162, "learning_rate": 9.968609064766907e-06, "loss": 0.9787, "step": 1830 }, { "epoch": 0.06467422258917652, "grad_norm": 1.7790486812591553, "learning_rate": 9.968545036237515e-06, "loss": 0.9635, "step": 1831 }, { "epoch": 0.06470954439288443, "grad_norm": 1.750568151473999, "learning_rate": 9.968480942680743e-06, "loss": 0.8791, "step": 1832 }, { "epoch": 0.06474486619659232, "grad_norm": 2.0161232948303223, "learning_rate": 9.968416784097425e-06, "loss": 0.9409, "step": 1833 }, { "epoch": 0.06478018800030023, "grad_norm": 1.4089094400405884, "learning_rate": 9.968352560488401e-06, "loss": 0.6058, "step": 1834 }, { "epoch": 0.06481550980400815, "grad_norm": 2.0384795665740967, "learning_rate": 9.968288271854514e-06, "loss": 0.9424, "step": 1835 }, { "epoch": 0.06485083160771604, "grad_norm": 2.414964199066162, "learning_rate": 9.968223918196603e-06, "loss": 0.9132, "step": 1836 }, { "epoch": 0.06488615341142395, "grad_norm": 2.069005250930786, "learning_rate": 9.968159499515512e-06, "loss": 0.9316, "step": 1837 }, { "epoch": 0.06492147521513186, "grad_norm": 1.8465203046798706, "learning_rate": 9.968095015812082e-06, "loss": 0.9209, "step": 1838 }, { "epoch": 0.06495679701883977, "grad_norm": 1.871851921081543, "learning_rate": 9.968030467087161e-06, "loss": 0.9065, "step": 1839 }, { "epoch": 0.06499211882254767, "grad_norm": 1.8944346904754639, "learning_rate": 9.967965853341589e-06, "loss": 0.9498, "step": 1840 }, { "epoch": 0.06502744062625558, "grad_norm": 1.6591624021530151, "learning_rate": 9.967901174576214e-06, "loss": 0.9135, "step": 1841 }, { "epoch": 0.06506276242996349, "grad_norm": 2.0016560554504395, "learning_rate": 9.967836430791883e-06, "loss": 0.9069, "step": 1842 }, { "epoch": 0.06509808423367139, "grad_norm": 1.797841191291809, "learning_rate": 9.967771621989443e-06, "loss": 0.9266, "step": 1843 }, { "epoch": 0.0651334060373793, "grad_norm": 1.9479269981384277, "learning_rate": 9.967706748169741e-06, "loss": 0.9624, "step": 1844 }, { "epoch": 0.06516872784108721, "grad_norm": 1.9017081260681152, "learning_rate": 9.967641809333626e-06, "loss": 0.9539, "step": 1845 }, { "epoch": 0.0652040496447951, "grad_norm": 1.831391453742981, "learning_rate": 9.967576805481951e-06, "loss": 0.969, "step": 1846 }, { "epoch": 0.06523937144850302, "grad_norm": 2.040966749191284, "learning_rate": 9.967511736615563e-06, "loss": 0.9056, "step": 1847 }, { "epoch": 0.06527469325221093, "grad_norm": 1.9124470949172974, "learning_rate": 9.967446602735316e-06, "loss": 0.917, "step": 1848 }, { "epoch": 0.06531001505591882, "grad_norm": 2.1999094486236572, "learning_rate": 9.967381403842062e-06, "loss": 0.9106, "step": 1849 }, { "epoch": 0.06534533685962673, "grad_norm": 1.887043833732605, "learning_rate": 9.967316139936654e-06, "loss": 0.9201, "step": 1850 }, { "epoch": 0.06538065866333465, "grad_norm": 1.9496830701828003, "learning_rate": 9.967250811019944e-06, "loss": 0.9261, "step": 1851 }, { "epoch": 0.06541598046704256, "grad_norm": 1.8703999519348145, "learning_rate": 9.96718541709279e-06, "loss": 0.9198, "step": 1852 }, { "epoch": 0.06545130227075045, "grad_norm": 2.199476957321167, "learning_rate": 9.967119958156048e-06, "loss": 0.9094, "step": 1853 }, { "epoch": 0.06548662407445836, "grad_norm": 2.126270055770874, "learning_rate": 9.967054434210574e-06, "loss": 0.9227, "step": 1854 }, { "epoch": 0.06552194587816627, "grad_norm": 2.106389284133911, "learning_rate": 9.966988845257224e-06, "loss": 0.9257, "step": 1855 }, { "epoch": 0.06555726768187417, "grad_norm": 2.0592052936553955, "learning_rate": 9.966923191296857e-06, "loss": 0.9283, "step": 1856 }, { "epoch": 0.06559258948558208, "grad_norm": 1.8461530208587646, "learning_rate": 9.966857472330335e-06, "loss": 0.9311, "step": 1857 }, { "epoch": 0.06562791128928999, "grad_norm": 2.1819865703582764, "learning_rate": 9.966791688358516e-06, "loss": 0.9253, "step": 1858 }, { "epoch": 0.06566323309299789, "grad_norm": 1.89625883102417, "learning_rate": 9.96672583938226e-06, "loss": 0.9254, "step": 1859 }, { "epoch": 0.0656985548967058, "grad_norm": 1.9554048776626587, "learning_rate": 9.96665992540243e-06, "loss": 0.9294, "step": 1860 }, { "epoch": 0.06573387670041371, "grad_norm": 1.9109275341033936, "learning_rate": 9.966593946419888e-06, "loss": 0.941, "step": 1861 }, { "epoch": 0.0657691985041216, "grad_norm": 1.893707513809204, "learning_rate": 9.9665279024355e-06, "loss": 0.9499, "step": 1862 }, { "epoch": 0.06580452030782952, "grad_norm": 1.789811372756958, "learning_rate": 9.966461793450126e-06, "loss": 0.9271, "step": 1863 }, { "epoch": 0.06583984211153743, "grad_norm": 1.805010437965393, "learning_rate": 9.966395619464635e-06, "loss": 0.9292, "step": 1864 }, { "epoch": 0.06587516391524534, "grad_norm": 1.769046664237976, "learning_rate": 9.96632938047989e-06, "loss": 0.9368, "step": 1865 }, { "epoch": 0.06591048571895324, "grad_norm": 2.056861400604248, "learning_rate": 9.966263076496762e-06, "loss": 0.9293, "step": 1866 }, { "epoch": 0.06594580752266115, "grad_norm": 1.839254379272461, "learning_rate": 9.966196707516116e-06, "loss": 0.9032, "step": 1867 }, { "epoch": 0.06598112932636906, "grad_norm": 1.8543933629989624, "learning_rate": 9.96613027353882e-06, "loss": 0.9149, "step": 1868 }, { "epoch": 0.06601645113007695, "grad_norm": 1.9196836948394775, "learning_rate": 9.966063774565745e-06, "loss": 0.9426, "step": 1869 }, { "epoch": 0.06605177293378486, "grad_norm": 1.874422311782837, "learning_rate": 9.96599721059776e-06, "loss": 0.9234, "step": 1870 }, { "epoch": 0.06608709473749277, "grad_norm": 1.8083980083465576, "learning_rate": 9.965930581635738e-06, "loss": 0.9363, "step": 1871 }, { "epoch": 0.06612241654120067, "grad_norm": 1.662324070930481, "learning_rate": 9.965863887680552e-06, "loss": 0.9009, "step": 1872 }, { "epoch": 0.06615773834490858, "grad_norm": 1.8814376592636108, "learning_rate": 9.96579712873307e-06, "loss": 0.9083, "step": 1873 }, { "epoch": 0.06619306014861649, "grad_norm": 1.801724910736084, "learning_rate": 9.96573030479417e-06, "loss": 0.9254, "step": 1874 }, { "epoch": 0.06622838195232439, "grad_norm": 1.9166368246078491, "learning_rate": 9.965663415864724e-06, "loss": 0.9272, "step": 1875 }, { "epoch": 0.0662637037560323, "grad_norm": 1.8936601877212524, "learning_rate": 9.965596461945609e-06, "loss": 0.9117, "step": 1876 }, { "epoch": 0.06629902555974021, "grad_norm": 1.9865227937698364, "learning_rate": 9.965529443037702e-06, "loss": 0.8962, "step": 1877 }, { "epoch": 0.06633434736344812, "grad_norm": 1.7068796157836914, "learning_rate": 9.965462359141877e-06, "loss": 0.9097, "step": 1878 }, { "epoch": 0.06636966916715602, "grad_norm": 1.8340352773666382, "learning_rate": 9.965395210259016e-06, "loss": 0.9242, "step": 1879 }, { "epoch": 0.06640499097086393, "grad_norm": 1.8853563070297241, "learning_rate": 9.965327996389996e-06, "loss": 0.9441, "step": 1880 }, { "epoch": 0.06644031277457184, "grad_norm": 1.8280646800994873, "learning_rate": 9.965260717535696e-06, "loss": 0.9046, "step": 1881 }, { "epoch": 0.06647563457827974, "grad_norm": 2.053900718688965, "learning_rate": 9.965193373696996e-06, "loss": 0.9281, "step": 1882 }, { "epoch": 0.06651095638198765, "grad_norm": 1.9019651412963867, "learning_rate": 9.96512596487478e-06, "loss": 0.9162, "step": 1883 }, { "epoch": 0.06654627818569556, "grad_norm": 1.717076301574707, "learning_rate": 9.965058491069928e-06, "loss": 0.8943, "step": 1884 }, { "epoch": 0.06658159998940345, "grad_norm": 1.7183116674423218, "learning_rate": 9.964990952283325e-06, "loss": 0.9404, "step": 1885 }, { "epoch": 0.06661692179311136, "grad_norm": 1.9659514427185059, "learning_rate": 9.964923348515851e-06, "loss": 0.9086, "step": 1886 }, { "epoch": 0.06665224359681927, "grad_norm": 1.7858270406723022, "learning_rate": 9.964855679768394e-06, "loss": 0.9456, "step": 1887 }, { "epoch": 0.06668756540052717, "grad_norm": 1.8253282308578491, "learning_rate": 9.964787946041842e-06, "loss": 0.9237, "step": 1888 }, { "epoch": 0.06672288720423508, "grad_norm": 1.706479787826538, "learning_rate": 9.964720147337078e-06, "loss": 0.9446, "step": 1889 }, { "epoch": 0.06675820900794299, "grad_norm": 2.0538575649261475, "learning_rate": 9.964652283654988e-06, "loss": 0.8942, "step": 1890 }, { "epoch": 0.0667935308116509, "grad_norm": 1.907974362373352, "learning_rate": 9.964584354996462e-06, "loss": 0.9536, "step": 1891 }, { "epoch": 0.0668288526153588, "grad_norm": 1.774581789970398, "learning_rate": 9.96451636136239e-06, "loss": 0.9436, "step": 1892 }, { "epoch": 0.06686417441906671, "grad_norm": 2.0571341514587402, "learning_rate": 9.964448302753662e-06, "loss": 0.9185, "step": 1893 }, { "epoch": 0.06689949622277462, "grad_norm": 2.091369867324829, "learning_rate": 9.964380179171168e-06, "loss": 0.932, "step": 1894 }, { "epoch": 0.06693481802648252, "grad_norm": 1.7900437116622925, "learning_rate": 9.964311990615798e-06, "loss": 0.8914, "step": 1895 }, { "epoch": 0.06697013983019043, "grad_norm": 1.9227144718170166, "learning_rate": 9.964243737088446e-06, "loss": 0.9503, "step": 1896 }, { "epoch": 0.06700546163389834, "grad_norm": 2.311760663986206, "learning_rate": 9.964175418590008e-06, "loss": 0.9392, "step": 1897 }, { "epoch": 0.06704078343760624, "grad_norm": 1.7392736673355103, "learning_rate": 9.964107035121372e-06, "loss": 0.9385, "step": 1898 }, { "epoch": 0.06707610524131415, "grad_norm": 1.9486005306243896, "learning_rate": 9.964038586683438e-06, "loss": 0.9151, "step": 1899 }, { "epoch": 0.06711142704502206, "grad_norm": 1.8462302684783936, "learning_rate": 9.963970073277097e-06, "loss": 0.9294, "step": 1900 }, { "epoch": 0.06714674884872995, "grad_norm": 1.7544125318527222, "learning_rate": 9.963901494903252e-06, "loss": 0.9164, "step": 1901 }, { "epoch": 0.06718207065243786, "grad_norm": 1.8091132640838623, "learning_rate": 9.963832851562797e-06, "loss": 0.9629, "step": 1902 }, { "epoch": 0.06721739245614577, "grad_norm": 1.8150992393493652, "learning_rate": 9.96376414325663e-06, "loss": 0.8868, "step": 1903 }, { "epoch": 0.06725271425985369, "grad_norm": 2.5542314052581787, "learning_rate": 9.96369536998565e-06, "loss": 0.9604, "step": 1904 }, { "epoch": 0.06728803606356158, "grad_norm": 1.8387550115585327, "learning_rate": 9.96362653175076e-06, "loss": 0.913, "step": 1905 }, { "epoch": 0.06732335786726949, "grad_norm": 1.791494607925415, "learning_rate": 9.963557628552858e-06, "loss": 0.9302, "step": 1906 }, { "epoch": 0.0673586796709774, "grad_norm": 1.7813880443572998, "learning_rate": 9.963488660392848e-06, "loss": 0.8881, "step": 1907 }, { "epoch": 0.0673940014746853, "grad_norm": 1.7980270385742188, "learning_rate": 9.96341962727163e-06, "loss": 0.9378, "step": 1908 }, { "epoch": 0.06742932327839321, "grad_norm": 2.024461030960083, "learning_rate": 9.96335052919011e-06, "loss": 0.9368, "step": 1909 }, { "epoch": 0.06746464508210112, "grad_norm": 2.0051486492156982, "learning_rate": 9.963281366149192e-06, "loss": 0.9336, "step": 1910 }, { "epoch": 0.06749996688580902, "grad_norm": 1.8724868297576904, "learning_rate": 9.963212138149779e-06, "loss": 0.9304, "step": 1911 }, { "epoch": 0.06753528868951693, "grad_norm": 2.068606376647949, "learning_rate": 9.963142845192778e-06, "loss": 0.9182, "step": 1912 }, { "epoch": 0.06757061049322484, "grad_norm": 2.5321121215820312, "learning_rate": 9.963073487279097e-06, "loss": 0.9171, "step": 1913 }, { "epoch": 0.06760593229693275, "grad_norm": 1.8331763744354248, "learning_rate": 9.963004064409643e-06, "loss": 0.903, "step": 1914 }, { "epoch": 0.06764125410064065, "grad_norm": 1.8548150062561035, "learning_rate": 9.962934576585326e-06, "loss": 0.9198, "step": 1915 }, { "epoch": 0.06767657590434856, "grad_norm": 2.1972270011901855, "learning_rate": 9.962865023807052e-06, "loss": 0.9523, "step": 1916 }, { "epoch": 0.06771189770805647, "grad_norm": 1.9379087686538696, "learning_rate": 9.962795406075735e-06, "loss": 0.8728, "step": 1917 }, { "epoch": 0.06774721951176436, "grad_norm": 1.7968453168869019, "learning_rate": 9.962725723392285e-06, "loss": 0.886, "step": 1918 }, { "epoch": 0.06778254131547228, "grad_norm": 1.7178682088851929, "learning_rate": 9.962655975757612e-06, "loss": 0.9143, "step": 1919 }, { "epoch": 0.06781786311918019, "grad_norm": 1.748528242111206, "learning_rate": 9.962586163172632e-06, "loss": 0.9037, "step": 1920 }, { "epoch": 0.06785318492288808, "grad_norm": 2.0347955226898193, "learning_rate": 9.962516285638257e-06, "loss": 0.9193, "step": 1921 }, { "epoch": 0.06788850672659599, "grad_norm": 1.8816754817962646, "learning_rate": 9.9624463431554e-06, "loss": 0.8925, "step": 1922 }, { "epoch": 0.0679238285303039, "grad_norm": 2.1808900833129883, "learning_rate": 9.96237633572498e-06, "loss": 0.9117, "step": 1923 }, { "epoch": 0.0679591503340118, "grad_norm": 1.9402596950531006, "learning_rate": 9.962306263347911e-06, "loss": 0.9018, "step": 1924 }, { "epoch": 0.06799447213771971, "grad_norm": 1.9135358333587646, "learning_rate": 9.96223612602511e-06, "loss": 0.9228, "step": 1925 }, { "epoch": 0.06802979394142762, "grad_norm": 1.7015300989151, "learning_rate": 9.962165923757497e-06, "loss": 0.9399, "step": 1926 }, { "epoch": 0.06806511574513553, "grad_norm": 1.8426177501678467, "learning_rate": 9.962095656545987e-06, "loss": 0.8867, "step": 1927 }, { "epoch": 0.06810043754884343, "grad_norm": 2.226886510848999, "learning_rate": 9.962025324391502e-06, "loss": 0.9116, "step": 1928 }, { "epoch": 0.06813575935255134, "grad_norm": 1.8811752796173096, "learning_rate": 9.961954927294963e-06, "loss": 0.9111, "step": 1929 }, { "epoch": 0.06817108115625925, "grad_norm": 1.7583421468734741, "learning_rate": 9.96188446525729e-06, "loss": 0.9081, "step": 1930 }, { "epoch": 0.06820640295996715, "grad_norm": 1.7635552883148193, "learning_rate": 9.961813938279408e-06, "loss": 0.9445, "step": 1931 }, { "epoch": 0.06824172476367506, "grad_norm": 1.9831424951553345, "learning_rate": 9.961743346362236e-06, "loss": 0.9629, "step": 1932 }, { "epoch": 0.06827704656738297, "grad_norm": 1.9982101917266846, "learning_rate": 9.9616726895067e-06, "loss": 0.9134, "step": 1933 }, { "epoch": 0.06831236837109086, "grad_norm": 1.77986741065979, "learning_rate": 9.961601967713723e-06, "loss": 0.9274, "step": 1934 }, { "epoch": 0.06834769017479878, "grad_norm": 1.765406608581543, "learning_rate": 9.961531180984235e-06, "loss": 0.8942, "step": 1935 }, { "epoch": 0.06838301197850669, "grad_norm": 1.8279520273208618, "learning_rate": 9.961460329319158e-06, "loss": 0.8961, "step": 1936 }, { "epoch": 0.06841833378221458, "grad_norm": 1.995525598526001, "learning_rate": 9.961389412719422e-06, "loss": 0.9611, "step": 1937 }, { "epoch": 0.0684536555859225, "grad_norm": 1.6875032186508179, "learning_rate": 9.961318431185952e-06, "loss": 0.9176, "step": 1938 }, { "epoch": 0.0684889773896304, "grad_norm": 1.8020578622817993, "learning_rate": 9.961247384719682e-06, "loss": 0.9333, "step": 1939 }, { "epoch": 0.06852429919333831, "grad_norm": 2.084315061569214, "learning_rate": 9.961176273321535e-06, "loss": 0.945, "step": 1940 }, { "epoch": 0.06855962099704621, "grad_norm": 1.9288804531097412, "learning_rate": 9.961105096992447e-06, "loss": 0.9166, "step": 1941 }, { "epoch": 0.06859494280075412, "grad_norm": 1.9428534507751465, "learning_rate": 9.96103385573335e-06, "loss": 0.923, "step": 1942 }, { "epoch": 0.06863026460446203, "grad_norm": 2.2491350173950195, "learning_rate": 9.960962549545172e-06, "loss": 0.911, "step": 1943 }, { "epoch": 0.06866558640816993, "grad_norm": 1.8866171836853027, "learning_rate": 9.960891178428851e-06, "loss": 0.887, "step": 1944 }, { "epoch": 0.06870090821187784, "grad_norm": 1.7522072792053223, "learning_rate": 9.960819742385316e-06, "loss": 0.8806, "step": 1945 }, { "epoch": 0.06873623001558575, "grad_norm": 1.7397558689117432, "learning_rate": 9.960748241415507e-06, "loss": 0.9209, "step": 1946 }, { "epoch": 0.06877155181929365, "grad_norm": 1.9102354049682617, "learning_rate": 9.960676675520358e-06, "loss": 0.926, "step": 1947 }, { "epoch": 0.06880687362300156, "grad_norm": 1.8073368072509766, "learning_rate": 9.960605044700805e-06, "loss": 0.9178, "step": 1948 }, { "epoch": 0.06884219542670947, "grad_norm": 1.859857439994812, "learning_rate": 9.960533348957783e-06, "loss": 0.9528, "step": 1949 }, { "epoch": 0.06887751723041736, "grad_norm": 1.717539668083191, "learning_rate": 9.960461588292235e-06, "loss": 0.9117, "step": 1950 }, { "epoch": 0.06891283903412528, "grad_norm": 1.5780386924743652, "learning_rate": 9.960389762705098e-06, "loss": 0.6311, "step": 1951 }, { "epoch": 0.06894816083783319, "grad_norm": 2.45353627204895, "learning_rate": 9.960317872197312e-06, "loss": 0.9059, "step": 1952 }, { "epoch": 0.0689834826415411, "grad_norm": 1.9246467351913452, "learning_rate": 9.960245916769817e-06, "loss": 0.8899, "step": 1953 }, { "epoch": 0.069018804445249, "grad_norm": 1.899958848953247, "learning_rate": 9.960173896423558e-06, "loss": 0.9047, "step": 1954 }, { "epoch": 0.0690541262489569, "grad_norm": 1.9043352603912354, "learning_rate": 9.960101811159475e-06, "loss": 0.9597, "step": 1955 }, { "epoch": 0.06908944805266481, "grad_norm": 1.8295434713363647, "learning_rate": 9.960029660978512e-06, "loss": 0.8879, "step": 1956 }, { "epoch": 0.06912476985637271, "grad_norm": 1.977940320968628, "learning_rate": 9.959957445881611e-06, "loss": 0.8915, "step": 1957 }, { "epoch": 0.06916009166008062, "grad_norm": 2.05295991897583, "learning_rate": 9.959885165869722e-06, "loss": 0.939, "step": 1958 }, { "epoch": 0.06919541346378853, "grad_norm": 2.103163242340088, "learning_rate": 9.959812820943787e-06, "loss": 0.8973, "step": 1959 }, { "epoch": 0.06923073526749643, "grad_norm": 1.969767689704895, "learning_rate": 9.959740411104754e-06, "loss": 0.9515, "step": 1960 }, { "epoch": 0.06926605707120434, "grad_norm": 1.9623253345489502, "learning_rate": 9.959667936353571e-06, "loss": 0.9044, "step": 1961 }, { "epoch": 0.06930137887491225, "grad_norm": 1.9468005895614624, "learning_rate": 9.95959539669119e-06, "loss": 0.898, "step": 1962 }, { "epoch": 0.06933670067862015, "grad_norm": 1.7835432291030884, "learning_rate": 9.959522792118551e-06, "loss": 0.9071, "step": 1963 }, { "epoch": 0.06937202248232806, "grad_norm": 1.8182812929153442, "learning_rate": 9.959450122636613e-06, "loss": 0.8968, "step": 1964 }, { "epoch": 0.06940734428603597, "grad_norm": 2.186326265335083, "learning_rate": 9.959377388246324e-06, "loss": 0.8858, "step": 1965 }, { "epoch": 0.06944266608974388, "grad_norm": 1.9144517183303833, "learning_rate": 9.959304588948634e-06, "loss": 0.8954, "step": 1966 }, { "epoch": 0.06947798789345178, "grad_norm": 1.7728067636489868, "learning_rate": 9.959231724744501e-06, "loss": 0.895, "step": 1967 }, { "epoch": 0.06951330969715969, "grad_norm": 1.9745348691940308, "learning_rate": 9.959158795634872e-06, "loss": 0.8875, "step": 1968 }, { "epoch": 0.0695486315008676, "grad_norm": 1.9547154903411865, "learning_rate": 9.959085801620707e-06, "loss": 0.9346, "step": 1969 }, { "epoch": 0.0695839533045755, "grad_norm": 1.7152971029281616, "learning_rate": 9.959012742702957e-06, "loss": 0.8868, "step": 1970 }, { "epoch": 0.0696192751082834, "grad_norm": 2.0225698947906494, "learning_rate": 9.958939618882584e-06, "loss": 0.9397, "step": 1971 }, { "epoch": 0.06965459691199131, "grad_norm": 2.758809804916382, "learning_rate": 9.95886643016054e-06, "loss": 0.9152, "step": 1972 }, { "epoch": 0.06968991871569921, "grad_norm": 1.9769755601882935, "learning_rate": 9.958793176537785e-06, "loss": 0.9373, "step": 1973 }, { "epoch": 0.06972524051940712, "grad_norm": 1.8560196161270142, "learning_rate": 9.958719858015274e-06, "loss": 0.9008, "step": 1974 }, { "epoch": 0.06976056232311503, "grad_norm": 1.9424693584442139, "learning_rate": 9.958646474593972e-06, "loss": 0.943, "step": 1975 }, { "epoch": 0.06979588412682293, "grad_norm": 1.9248439073562622, "learning_rate": 9.958573026274838e-06, "loss": 0.8825, "step": 1976 }, { "epoch": 0.06983120593053084, "grad_norm": 1.6453070640563965, "learning_rate": 9.95849951305883e-06, "loss": 0.8936, "step": 1977 }, { "epoch": 0.06986652773423875, "grad_norm": 1.6411972045898438, "learning_rate": 9.958425934946913e-06, "loss": 0.8725, "step": 1978 }, { "epoch": 0.06990184953794666, "grad_norm": 1.9951491355895996, "learning_rate": 9.958352291940049e-06, "loss": 0.9595, "step": 1979 }, { "epoch": 0.06993717134165456, "grad_norm": 1.7017143964767456, "learning_rate": 9.958278584039204e-06, "loss": 0.86, "step": 1980 }, { "epoch": 0.06997249314536247, "grad_norm": 2.1792004108428955, "learning_rate": 9.958204811245341e-06, "loss": 0.933, "step": 1981 }, { "epoch": 0.07000781494907038, "grad_norm": 1.8229495286941528, "learning_rate": 9.958130973559426e-06, "loss": 0.8935, "step": 1982 }, { "epoch": 0.07004313675277828, "grad_norm": 1.297348141670227, "learning_rate": 9.958057070982423e-06, "loss": 0.5865, "step": 1983 }, { "epoch": 0.07007845855648619, "grad_norm": 2.136704683303833, "learning_rate": 9.9579831035153e-06, "loss": 0.9074, "step": 1984 }, { "epoch": 0.0701137803601941, "grad_norm": 1.9317816495895386, "learning_rate": 9.957909071159029e-06, "loss": 0.8982, "step": 1985 }, { "epoch": 0.070149102163902, "grad_norm": 1.9010218381881714, "learning_rate": 9.957834973914576e-06, "loss": 0.9113, "step": 1986 }, { "epoch": 0.0701844239676099, "grad_norm": 1.745954990386963, "learning_rate": 9.957760811782907e-06, "loss": 0.907, "step": 1987 }, { "epoch": 0.07021974577131782, "grad_norm": 1.752945899963379, "learning_rate": 9.957686584765e-06, "loss": 0.8972, "step": 1988 }, { "epoch": 0.07025506757502571, "grad_norm": 1.9481172561645508, "learning_rate": 9.957612292861822e-06, "loss": 0.9246, "step": 1989 }, { "epoch": 0.07029038937873362, "grad_norm": 1.9452359676361084, "learning_rate": 9.957537936074347e-06, "loss": 0.8726, "step": 1990 }, { "epoch": 0.07032571118244153, "grad_norm": 2.159686803817749, "learning_rate": 9.957463514403546e-06, "loss": 0.9365, "step": 1991 }, { "epoch": 0.07036103298614944, "grad_norm": 2.2195005416870117, "learning_rate": 9.957389027850395e-06, "loss": 0.9062, "step": 1992 }, { "epoch": 0.07039635478985734, "grad_norm": 1.8280171155929565, "learning_rate": 9.957314476415869e-06, "loss": 0.8876, "step": 1993 }, { "epoch": 0.07043167659356525, "grad_norm": 1.9725923538208008, "learning_rate": 9.957239860100943e-06, "loss": 0.928, "step": 1994 }, { "epoch": 0.07046699839727316, "grad_norm": 1.910563349723816, "learning_rate": 9.957165178906592e-06, "loss": 0.9374, "step": 1995 }, { "epoch": 0.07050232020098106, "grad_norm": 1.7896578311920166, "learning_rate": 9.957090432833796e-06, "loss": 0.8967, "step": 1996 }, { "epoch": 0.07053764200468897, "grad_norm": 1.8785933256149292, "learning_rate": 9.957015621883532e-06, "loss": 0.9455, "step": 1997 }, { "epoch": 0.07057296380839688, "grad_norm": 1.912709355354309, "learning_rate": 9.95694074605678e-06, "loss": 0.9211, "step": 1998 }, { "epoch": 0.07060828561210478, "grad_norm": 2.0098159313201904, "learning_rate": 9.956865805354519e-06, "loss": 0.9436, "step": 1999 }, { "epoch": 0.07064360741581269, "grad_norm": 1.867157220840454, "learning_rate": 9.95679079977773e-06, "loss": 0.8941, "step": 2000 }, { "epoch": 0.0706789292195206, "grad_norm": 1.8774669170379639, "learning_rate": 9.956715729327394e-06, "loss": 0.9315, "step": 2001 }, { "epoch": 0.0707142510232285, "grad_norm": 1.926061749458313, "learning_rate": 9.956640594004495e-06, "loss": 0.8986, "step": 2002 }, { "epoch": 0.0707495728269364, "grad_norm": 1.8351675271987915, "learning_rate": 9.956565393810017e-06, "loss": 0.8912, "step": 2003 }, { "epoch": 0.07078489463064432, "grad_norm": 1.9649018049240112, "learning_rate": 9.95649012874494e-06, "loss": 0.904, "step": 2004 }, { "epoch": 0.07082021643435223, "grad_norm": 1.9405092000961304, "learning_rate": 9.956414798810254e-06, "loss": 0.9396, "step": 2005 }, { "epoch": 0.07085553823806012, "grad_norm": 2.1526262760162354, "learning_rate": 9.956339404006943e-06, "loss": 0.9083, "step": 2006 }, { "epoch": 0.07089086004176803, "grad_norm": 1.9561856985092163, "learning_rate": 9.956263944335993e-06, "loss": 0.9284, "step": 2007 }, { "epoch": 0.07092618184547594, "grad_norm": 1.8221102952957153, "learning_rate": 9.956188419798392e-06, "loss": 0.9324, "step": 2008 }, { "epoch": 0.07096150364918384, "grad_norm": 1.8915446996688843, "learning_rate": 9.956112830395129e-06, "loss": 0.9649, "step": 2009 }, { "epoch": 0.07099682545289175, "grad_norm": 1.872920274734497, "learning_rate": 9.956037176127192e-06, "loss": 0.9296, "step": 2010 }, { "epoch": 0.07103214725659966, "grad_norm": 1.9542990922927856, "learning_rate": 9.955961456995572e-06, "loss": 0.9157, "step": 2011 }, { "epoch": 0.07106746906030756, "grad_norm": 2.0375144481658936, "learning_rate": 9.955885673001261e-06, "loss": 0.9143, "step": 2012 }, { "epoch": 0.07110279086401547, "grad_norm": 1.8390096426010132, "learning_rate": 9.95580982414525e-06, "loss": 0.8974, "step": 2013 }, { "epoch": 0.07113811266772338, "grad_norm": 1.9032460451126099, "learning_rate": 9.95573391042853e-06, "loss": 0.8982, "step": 2014 }, { "epoch": 0.07117343447143129, "grad_norm": 1.8259323835372925, "learning_rate": 9.955657931852098e-06, "loss": 0.8997, "step": 2015 }, { "epoch": 0.07120875627513919, "grad_norm": 2.2856040000915527, "learning_rate": 9.955581888416946e-06, "loss": 0.9354, "step": 2016 }, { "epoch": 0.0712440780788471, "grad_norm": 1.993506669998169, "learning_rate": 9.95550578012407e-06, "loss": 0.918, "step": 2017 }, { "epoch": 0.07127939988255501, "grad_norm": 1.7949570417404175, "learning_rate": 9.955429606974464e-06, "loss": 0.9009, "step": 2018 }, { "epoch": 0.0713147216862629, "grad_norm": 1.840005874633789, "learning_rate": 9.95535336896913e-06, "loss": 0.9311, "step": 2019 }, { "epoch": 0.07135004348997082, "grad_norm": 1.9397858381271362, "learning_rate": 9.955277066109061e-06, "loss": 0.9329, "step": 2020 }, { "epoch": 0.07138536529367873, "grad_norm": 1.9958752393722534, "learning_rate": 9.955200698395258e-06, "loss": 0.9106, "step": 2021 }, { "epoch": 0.07142068709738662, "grad_norm": 1.7184629440307617, "learning_rate": 9.955124265828718e-06, "loss": 0.8702, "step": 2022 }, { "epoch": 0.07145600890109453, "grad_norm": 1.881285309791565, "learning_rate": 9.955047768410443e-06, "loss": 0.9518, "step": 2023 }, { "epoch": 0.07149133070480244, "grad_norm": 1.8003829717636108, "learning_rate": 9.954971206141437e-06, "loss": 0.8937, "step": 2024 }, { "epoch": 0.07152665250851034, "grad_norm": 1.8373106718063354, "learning_rate": 9.954894579022699e-06, "loss": 0.931, "step": 2025 }, { "epoch": 0.07156197431221825, "grad_norm": 1.9624249935150146, "learning_rate": 9.95481788705523e-06, "loss": 0.879, "step": 2026 }, { "epoch": 0.07159729611592616, "grad_norm": 1.9597351551055908, "learning_rate": 9.954741130240037e-06, "loss": 0.9446, "step": 2027 }, { "epoch": 0.07163261791963407, "grad_norm": 2.042196273803711, "learning_rate": 9.954664308578122e-06, "loss": 0.9192, "step": 2028 }, { "epoch": 0.07166793972334197, "grad_norm": 1.8418796062469482, "learning_rate": 9.954587422070494e-06, "loss": 0.9227, "step": 2029 }, { "epoch": 0.07170326152704988, "grad_norm": 1.733757734298706, "learning_rate": 9.954510470718155e-06, "loss": 0.8818, "step": 2030 }, { "epoch": 0.07173858333075779, "grad_norm": 1.7910902500152588, "learning_rate": 9.954433454522117e-06, "loss": 0.9316, "step": 2031 }, { "epoch": 0.07177390513446569, "grad_norm": 2.248375654220581, "learning_rate": 9.954356373483383e-06, "loss": 0.913, "step": 2032 }, { "epoch": 0.0718092269381736, "grad_norm": 1.7920725345611572, "learning_rate": 9.954279227602965e-06, "loss": 0.8635, "step": 2033 }, { "epoch": 0.07184454874188151, "grad_norm": 1.8940331935882568, "learning_rate": 9.954202016881872e-06, "loss": 0.8807, "step": 2034 }, { "epoch": 0.0718798705455894, "grad_norm": 1.9591389894485474, "learning_rate": 9.954124741321114e-06, "loss": 0.9139, "step": 2035 }, { "epoch": 0.07191519234929732, "grad_norm": 1.7083842754364014, "learning_rate": 9.954047400921702e-06, "loss": 0.9176, "step": 2036 }, { "epoch": 0.07195051415300523, "grad_norm": 2.1086246967315674, "learning_rate": 9.95396999568465e-06, "loss": 0.9286, "step": 2037 }, { "epoch": 0.07198583595671312, "grad_norm": 1.903597116470337, "learning_rate": 9.95389252561097e-06, "loss": 0.941, "step": 2038 }, { "epoch": 0.07202115776042103, "grad_norm": 1.8411740064620972, "learning_rate": 9.953814990701676e-06, "loss": 0.8934, "step": 2039 }, { "epoch": 0.07205647956412894, "grad_norm": 1.9636423587799072, "learning_rate": 9.953737390957781e-06, "loss": 0.9211, "step": 2040 }, { "epoch": 0.07209180136783686, "grad_norm": 1.9894964694976807, "learning_rate": 9.953659726380304e-06, "loss": 0.9407, "step": 2041 }, { "epoch": 0.07212712317154475, "grad_norm": 1.9982446432113647, "learning_rate": 9.953581996970258e-06, "loss": 0.871, "step": 2042 }, { "epoch": 0.07216244497525266, "grad_norm": 1.8349781036376953, "learning_rate": 9.953504202728664e-06, "loss": 0.9094, "step": 2043 }, { "epoch": 0.07219776677896057, "grad_norm": 1.9450747966766357, "learning_rate": 9.953426343656537e-06, "loss": 0.9409, "step": 2044 }, { "epoch": 0.07223308858266847, "grad_norm": 1.9480677843093872, "learning_rate": 9.953348419754897e-06, "loss": 0.9022, "step": 2045 }, { "epoch": 0.07226841038637638, "grad_norm": 2.2484638690948486, "learning_rate": 9.953270431024765e-06, "loss": 0.9341, "step": 2046 }, { "epoch": 0.07230373219008429, "grad_norm": 1.800344467163086, "learning_rate": 9.95319237746716e-06, "loss": 0.9193, "step": 2047 }, { "epoch": 0.07233905399379219, "grad_norm": 1.8634681701660156, "learning_rate": 9.953114259083104e-06, "loss": 0.9364, "step": 2048 }, { "epoch": 0.0723743757975001, "grad_norm": 1.947556495666504, "learning_rate": 9.95303607587362e-06, "loss": 0.9049, "step": 2049 }, { "epoch": 0.07240969760120801, "grad_norm": 1.8537380695343018, "learning_rate": 9.95295782783973e-06, "loss": 0.8939, "step": 2050 }, { "epoch": 0.0724450194049159, "grad_norm": 1.719569444656372, "learning_rate": 9.95287951498246e-06, "loss": 0.9005, "step": 2051 }, { "epoch": 0.07248034120862382, "grad_norm": 1.9125083684921265, "learning_rate": 9.952801137302833e-06, "loss": 0.9033, "step": 2052 }, { "epoch": 0.07251566301233173, "grad_norm": 1.8486703634262085, "learning_rate": 9.952722694801878e-06, "loss": 0.8862, "step": 2053 }, { "epoch": 0.07255098481603964, "grad_norm": 1.9099618196487427, "learning_rate": 9.952644187480616e-06, "loss": 0.8883, "step": 2054 }, { "epoch": 0.07258630661974753, "grad_norm": 1.7923794984817505, "learning_rate": 9.95256561534008e-06, "loss": 0.8748, "step": 2055 }, { "epoch": 0.07262162842345544, "grad_norm": 1.9237594604492188, "learning_rate": 9.952486978381296e-06, "loss": 0.9155, "step": 2056 }, { "epoch": 0.07265695022716336, "grad_norm": 1.7852754592895508, "learning_rate": 9.952408276605294e-06, "loss": 0.9148, "step": 2057 }, { "epoch": 0.07269227203087125, "grad_norm": 2.2135770320892334, "learning_rate": 9.952329510013102e-06, "loss": 0.9413, "step": 2058 }, { "epoch": 0.07272759383457916, "grad_norm": 1.9526904821395874, "learning_rate": 9.952250678605753e-06, "loss": 0.9335, "step": 2059 }, { "epoch": 0.07276291563828707, "grad_norm": 1.755257248878479, "learning_rate": 9.952171782384279e-06, "loss": 0.9198, "step": 2060 }, { "epoch": 0.07279823744199497, "grad_norm": 1.9014692306518555, "learning_rate": 9.95209282134971e-06, "loss": 0.8476, "step": 2061 }, { "epoch": 0.07283355924570288, "grad_norm": 1.624693751335144, "learning_rate": 9.95201379550308e-06, "loss": 0.9048, "step": 2062 }, { "epoch": 0.07286888104941079, "grad_norm": 2.386934995651245, "learning_rate": 9.951934704845428e-06, "loss": 0.9476, "step": 2063 }, { "epoch": 0.07290420285311869, "grad_norm": 2.005467176437378, "learning_rate": 9.951855549377784e-06, "loss": 0.9121, "step": 2064 }, { "epoch": 0.0729395246568266, "grad_norm": 2.0552399158477783, "learning_rate": 9.951776329101183e-06, "loss": 0.8828, "step": 2065 }, { "epoch": 0.07297484646053451, "grad_norm": 1.82048499584198, "learning_rate": 9.951697044016669e-06, "loss": 0.9296, "step": 2066 }, { "epoch": 0.07301016826424242, "grad_norm": 1.881424903869629, "learning_rate": 9.95161769412527e-06, "loss": 0.9532, "step": 2067 }, { "epoch": 0.07304549006795032, "grad_norm": 1.650208830833435, "learning_rate": 9.951538279428033e-06, "loss": 0.8927, "step": 2068 }, { "epoch": 0.07308081187165823, "grad_norm": 2.1928327083587646, "learning_rate": 9.951458799925992e-06, "loss": 0.9033, "step": 2069 }, { "epoch": 0.07311613367536614, "grad_norm": 1.7424931526184082, "learning_rate": 9.951379255620188e-06, "loss": 0.9187, "step": 2070 }, { "epoch": 0.07315145547907403, "grad_norm": 1.9211039543151855, "learning_rate": 9.951299646511664e-06, "loss": 0.9057, "step": 2071 }, { "epoch": 0.07318677728278195, "grad_norm": 1.7436555624008179, "learning_rate": 9.951219972601463e-06, "loss": 0.8402, "step": 2072 }, { "epoch": 0.07322209908648986, "grad_norm": 1.74430251121521, "learning_rate": 9.951140233890623e-06, "loss": 0.9282, "step": 2073 }, { "epoch": 0.07325742089019775, "grad_norm": 2.061392068862915, "learning_rate": 9.951060430380191e-06, "loss": 0.9054, "step": 2074 }, { "epoch": 0.07329274269390566, "grad_norm": 1.776489496231079, "learning_rate": 9.950980562071211e-06, "loss": 0.9474, "step": 2075 }, { "epoch": 0.07332806449761357, "grad_norm": 1.8059957027435303, "learning_rate": 9.95090062896473e-06, "loss": 0.882, "step": 2076 }, { "epoch": 0.07336338630132147, "grad_norm": 1.9199661016464233, "learning_rate": 9.95082063106179e-06, "loss": 0.9026, "step": 2077 }, { "epoch": 0.07339870810502938, "grad_norm": 1.8702876567840576, "learning_rate": 9.95074056836344e-06, "loss": 0.904, "step": 2078 }, { "epoch": 0.07343402990873729, "grad_norm": 1.8432426452636719, "learning_rate": 9.95066044087073e-06, "loss": 0.9194, "step": 2079 }, { "epoch": 0.0734693517124452, "grad_norm": 1.962503433227539, "learning_rate": 9.950580248584705e-06, "loss": 0.9129, "step": 2080 }, { "epoch": 0.0735046735161531, "grad_norm": 1.8757960796356201, "learning_rate": 9.950499991506417e-06, "loss": 0.9146, "step": 2081 }, { "epoch": 0.07353999531986101, "grad_norm": 1.920292615890503, "learning_rate": 9.950419669636915e-06, "loss": 0.9157, "step": 2082 }, { "epoch": 0.07357531712356892, "grad_norm": 2.061042070388794, "learning_rate": 9.950339282977252e-06, "loss": 0.9099, "step": 2083 }, { "epoch": 0.07361063892727682, "grad_norm": 1.87960684299469, "learning_rate": 9.950258831528479e-06, "loss": 0.9772, "step": 2084 }, { "epoch": 0.07364596073098473, "grad_norm": 1.968140959739685, "learning_rate": 9.950178315291648e-06, "loss": 0.8997, "step": 2085 }, { "epoch": 0.07368128253469264, "grad_norm": 1.8338814973831177, "learning_rate": 9.950097734267813e-06, "loss": 0.9073, "step": 2086 }, { "epoch": 0.07371660433840053, "grad_norm": 1.7349790334701538, "learning_rate": 9.950017088458032e-06, "loss": 0.9082, "step": 2087 }, { "epoch": 0.07375192614210845, "grad_norm": 1.9830667972564697, "learning_rate": 9.949936377863356e-06, "loss": 0.8899, "step": 2088 }, { "epoch": 0.07378724794581636, "grad_norm": 1.7700927257537842, "learning_rate": 9.949855602484843e-06, "loss": 0.9236, "step": 2089 }, { "epoch": 0.07382256974952425, "grad_norm": 1.8778787851333618, "learning_rate": 9.949774762323552e-06, "loss": 0.8941, "step": 2090 }, { "epoch": 0.07385789155323216, "grad_norm": 1.7888436317443848, "learning_rate": 9.949693857380539e-06, "loss": 0.906, "step": 2091 }, { "epoch": 0.07389321335694007, "grad_norm": 1.9278278350830078, "learning_rate": 9.94961288765686e-06, "loss": 0.9085, "step": 2092 }, { "epoch": 0.07392853516064798, "grad_norm": 2.116604804992676, "learning_rate": 9.949531853153582e-06, "loss": 0.9109, "step": 2093 }, { "epoch": 0.07396385696435588, "grad_norm": 1.770573616027832, "learning_rate": 9.94945075387176e-06, "loss": 0.8996, "step": 2094 }, { "epoch": 0.07399917876806379, "grad_norm": 1.8215937614440918, "learning_rate": 9.949369589812457e-06, "loss": 0.8364, "step": 2095 }, { "epoch": 0.0740345005717717, "grad_norm": 1.8808791637420654, "learning_rate": 9.949288360976735e-06, "loss": 0.9044, "step": 2096 }, { "epoch": 0.0740698223754796, "grad_norm": 1.9009901285171509, "learning_rate": 9.949207067365659e-06, "loss": 0.8988, "step": 2097 }, { "epoch": 0.07410514417918751, "grad_norm": 1.8923625946044922, "learning_rate": 9.949125708980288e-06, "loss": 0.9052, "step": 2098 }, { "epoch": 0.07414046598289542, "grad_norm": 2.0736520290374756, "learning_rate": 9.949044285821691e-06, "loss": 0.9454, "step": 2099 }, { "epoch": 0.07417578778660332, "grad_norm": 2.032633066177368, "learning_rate": 9.948962797890935e-06, "loss": 0.9382, "step": 2100 }, { "epoch": 0.07421110959031123, "grad_norm": 1.9623178243637085, "learning_rate": 9.94888124518908e-06, "loss": 0.914, "step": 2101 }, { "epoch": 0.07424643139401914, "grad_norm": 2.002913475036621, "learning_rate": 9.948799627717202e-06, "loss": 0.8983, "step": 2102 }, { "epoch": 0.07428175319772704, "grad_norm": 1.7102872133255005, "learning_rate": 9.948717945476363e-06, "loss": 0.8896, "step": 2103 }, { "epoch": 0.07431707500143495, "grad_norm": 1.7204960584640503, "learning_rate": 9.948636198467634e-06, "loss": 0.9001, "step": 2104 }, { "epoch": 0.07435239680514286, "grad_norm": 1.6553338766098022, "learning_rate": 9.948554386692085e-06, "loss": 0.8981, "step": 2105 }, { "epoch": 0.07438771860885077, "grad_norm": 2.0350348949432373, "learning_rate": 9.948472510150786e-06, "loss": 0.9433, "step": 2106 }, { "epoch": 0.07442304041255866, "grad_norm": 2.071997880935669, "learning_rate": 9.948390568844809e-06, "loss": 0.9022, "step": 2107 }, { "epoch": 0.07445836221626657, "grad_norm": 1.8159712553024292, "learning_rate": 9.948308562775227e-06, "loss": 0.9383, "step": 2108 }, { "epoch": 0.07449368401997448, "grad_norm": 1.9187666177749634, "learning_rate": 9.948226491943112e-06, "loss": 0.9113, "step": 2109 }, { "epoch": 0.07452900582368238, "grad_norm": 2.1388487815856934, "learning_rate": 9.948144356349538e-06, "loss": 0.8935, "step": 2110 }, { "epoch": 0.07456432762739029, "grad_norm": 1.784528374671936, "learning_rate": 9.948062155995581e-06, "loss": 0.9191, "step": 2111 }, { "epoch": 0.0745996494310982, "grad_norm": 1.7588549852371216, "learning_rate": 9.947979890882318e-06, "loss": 0.9437, "step": 2112 }, { "epoch": 0.0746349712348061, "grad_norm": 1.718919277191162, "learning_rate": 9.947897561010823e-06, "loss": 0.8822, "step": 2113 }, { "epoch": 0.07467029303851401, "grad_norm": 1.9472349882125854, "learning_rate": 9.947815166382176e-06, "loss": 0.9121, "step": 2114 }, { "epoch": 0.07470561484222192, "grad_norm": 1.8624780178070068, "learning_rate": 9.947732706997453e-06, "loss": 0.8991, "step": 2115 }, { "epoch": 0.07474093664592983, "grad_norm": 1.8815239667892456, "learning_rate": 9.947650182857735e-06, "loss": 0.9141, "step": 2116 }, { "epoch": 0.07477625844963773, "grad_norm": 1.7669674158096313, "learning_rate": 9.947567593964101e-06, "loss": 0.8963, "step": 2117 }, { "epoch": 0.07481158025334564, "grad_norm": 1.8125264644622803, "learning_rate": 9.947484940317633e-06, "loss": 0.9079, "step": 2118 }, { "epoch": 0.07484690205705355, "grad_norm": 1.8636972904205322, "learning_rate": 9.947402221919413e-06, "loss": 0.9248, "step": 2119 }, { "epoch": 0.07488222386076145, "grad_norm": 1.833670973777771, "learning_rate": 9.947319438770521e-06, "loss": 0.9237, "step": 2120 }, { "epoch": 0.07491754566446936, "grad_norm": 1.8976166248321533, "learning_rate": 9.947236590872043e-06, "loss": 0.9412, "step": 2121 }, { "epoch": 0.07495286746817727, "grad_norm": 1.8336325883865356, "learning_rate": 9.947153678225063e-06, "loss": 0.8822, "step": 2122 }, { "epoch": 0.07498818927188516, "grad_norm": 2.0240564346313477, "learning_rate": 9.947070700830664e-06, "loss": 0.9219, "step": 2123 }, { "epoch": 0.07502351107559307, "grad_norm": 1.9504191875457764, "learning_rate": 9.946987658689934e-06, "loss": 0.89, "step": 2124 }, { "epoch": 0.07505883287930099, "grad_norm": 1.84110426902771, "learning_rate": 9.946904551803961e-06, "loss": 0.9229, "step": 2125 }, { "epoch": 0.07509415468300888, "grad_norm": 1.7105895280838013, "learning_rate": 9.94682138017383e-06, "loss": 0.8732, "step": 2126 }, { "epoch": 0.07512947648671679, "grad_norm": 1.818419337272644, "learning_rate": 9.946738143800632e-06, "loss": 0.911, "step": 2127 }, { "epoch": 0.0751647982904247, "grad_norm": 1.8041551113128662, "learning_rate": 9.946654842685452e-06, "loss": 0.9094, "step": 2128 }, { "epoch": 0.07520012009413261, "grad_norm": 1.8263940811157227, "learning_rate": 9.946571476829386e-06, "loss": 0.8956, "step": 2129 }, { "epoch": 0.07523544189784051, "grad_norm": 1.9322501420974731, "learning_rate": 9.946488046233522e-06, "loss": 0.9149, "step": 2130 }, { "epoch": 0.07527076370154842, "grad_norm": 1.8146899938583374, "learning_rate": 9.946404550898951e-06, "loss": 0.8754, "step": 2131 }, { "epoch": 0.07530608550525633, "grad_norm": 2.002013921737671, "learning_rate": 9.946320990826768e-06, "loss": 0.8786, "step": 2132 }, { "epoch": 0.07534140730896423, "grad_norm": 1.863393783569336, "learning_rate": 9.946237366018065e-06, "loss": 0.8962, "step": 2133 }, { "epoch": 0.07537672911267214, "grad_norm": 1.9914904832839966, "learning_rate": 9.946153676473938e-06, "loss": 0.8893, "step": 2134 }, { "epoch": 0.07541205091638005, "grad_norm": 1.958796501159668, "learning_rate": 9.946069922195483e-06, "loss": 0.9377, "step": 2135 }, { "epoch": 0.07544737272008795, "grad_norm": 1.7131226062774658, "learning_rate": 9.945986103183791e-06, "loss": 0.9124, "step": 2136 }, { "epoch": 0.07548269452379586, "grad_norm": 1.8617660999298096, "learning_rate": 9.945902219439965e-06, "loss": 0.912, "step": 2137 }, { "epoch": 0.07551801632750377, "grad_norm": 1.8842514753341675, "learning_rate": 9.9458182709651e-06, "loss": 0.9134, "step": 2138 }, { "epoch": 0.07555333813121166, "grad_norm": 1.297008752822876, "learning_rate": 9.945734257760296e-06, "loss": 0.6018, "step": 2139 }, { "epoch": 0.07558865993491957, "grad_norm": 1.8924627304077148, "learning_rate": 9.94565017982665e-06, "loss": 0.9284, "step": 2140 }, { "epoch": 0.07562398173862749, "grad_norm": 1.8361256122589111, "learning_rate": 9.945566037165265e-06, "loss": 0.9173, "step": 2141 }, { "epoch": 0.0756593035423354, "grad_norm": 1.8026047945022583, "learning_rate": 9.94548182977724e-06, "loss": 0.8972, "step": 2142 }, { "epoch": 0.07569462534604329, "grad_norm": 1.8994914293289185, "learning_rate": 9.94539755766368e-06, "loss": 0.9203, "step": 2143 }, { "epoch": 0.0757299471497512, "grad_norm": 1.8095287084579468, "learning_rate": 9.945313220825687e-06, "loss": 0.9089, "step": 2144 }, { "epoch": 0.07576526895345911, "grad_norm": 1.7214752435684204, "learning_rate": 9.945228819264364e-06, "loss": 0.8769, "step": 2145 }, { "epoch": 0.07580059075716701, "grad_norm": 1.8167023658752441, "learning_rate": 9.945144352980816e-06, "loss": 0.9068, "step": 2146 }, { "epoch": 0.07583591256087492, "grad_norm": 1.8295533657073975, "learning_rate": 9.945059821976146e-06, "loss": 0.9256, "step": 2147 }, { "epoch": 0.07587123436458283, "grad_norm": 1.8211671113967896, "learning_rate": 9.944975226251466e-06, "loss": 0.9035, "step": 2148 }, { "epoch": 0.07590655616829073, "grad_norm": 1.8443211317062378, "learning_rate": 9.944890565807876e-06, "loss": 0.9165, "step": 2149 }, { "epoch": 0.07594187797199864, "grad_norm": 1.9321413040161133, "learning_rate": 9.94480584064649e-06, "loss": 0.9234, "step": 2150 }, { "epoch": 0.07597719977570655, "grad_norm": 1.7444381713867188, "learning_rate": 9.944721050768414e-06, "loss": 0.8982, "step": 2151 }, { "epoch": 0.07601252157941445, "grad_norm": 2.0023105144500732, "learning_rate": 9.94463619617476e-06, "loss": 0.8995, "step": 2152 }, { "epoch": 0.07604784338312236, "grad_norm": 1.6864286661148071, "learning_rate": 9.944551276866634e-06, "loss": 0.8901, "step": 2153 }, { "epoch": 0.07608316518683027, "grad_norm": 1.7944986820220947, "learning_rate": 9.944466292845153e-06, "loss": 0.9083, "step": 2154 }, { "epoch": 0.07611848699053818, "grad_norm": 1.2740511894226074, "learning_rate": 9.944381244111427e-06, "loss": 0.5865, "step": 2155 }, { "epoch": 0.07615380879424608, "grad_norm": 1.9020627737045288, "learning_rate": 9.944296130666567e-06, "loss": 0.9001, "step": 2156 }, { "epoch": 0.07618913059795399, "grad_norm": 1.871384620666504, "learning_rate": 9.944210952511688e-06, "loss": 0.8817, "step": 2157 }, { "epoch": 0.0762244524016619, "grad_norm": 1.845901370048523, "learning_rate": 9.944125709647907e-06, "loss": 0.9289, "step": 2158 }, { "epoch": 0.07625977420536979, "grad_norm": 2.507162094116211, "learning_rate": 9.944040402076338e-06, "loss": 0.9328, "step": 2159 }, { "epoch": 0.0762950960090777, "grad_norm": 1.7967971563339233, "learning_rate": 9.943955029798098e-06, "loss": 0.8842, "step": 2160 }, { "epoch": 0.07633041781278561, "grad_norm": 1.9139680862426758, "learning_rate": 9.9438695928143e-06, "loss": 0.941, "step": 2161 }, { "epoch": 0.07636573961649351, "grad_norm": 1.9318472146987915, "learning_rate": 9.943784091126072e-06, "loss": 0.9733, "step": 2162 }, { "epoch": 0.07640106142020142, "grad_norm": 1.6469380855560303, "learning_rate": 9.943698524734524e-06, "loss": 0.8816, "step": 2163 }, { "epoch": 0.07643638322390933, "grad_norm": 2.104306936264038, "learning_rate": 9.94361289364078e-06, "loss": 0.9234, "step": 2164 }, { "epoch": 0.07647170502761723, "grad_norm": 1.9305511713027954, "learning_rate": 9.943527197845959e-06, "loss": 0.9417, "step": 2165 }, { "epoch": 0.07650702683132514, "grad_norm": 2.1090595722198486, "learning_rate": 9.943441437351184e-06, "loss": 0.877, "step": 2166 }, { "epoch": 0.07654234863503305, "grad_norm": 1.864294409751892, "learning_rate": 9.943355612157576e-06, "loss": 0.8761, "step": 2167 }, { "epoch": 0.07657767043874096, "grad_norm": 2.139045476913452, "learning_rate": 9.943269722266258e-06, "loss": 0.9125, "step": 2168 }, { "epoch": 0.07661299224244886, "grad_norm": 1.8851194381713867, "learning_rate": 9.943183767678357e-06, "loss": 0.9109, "step": 2169 }, { "epoch": 0.07664831404615677, "grad_norm": 1.7274137735366821, "learning_rate": 9.943097748394996e-06, "loss": 0.905, "step": 2170 }, { "epoch": 0.07668363584986468, "grad_norm": 1.7115981578826904, "learning_rate": 9.9430116644173e-06, "loss": 0.9033, "step": 2171 }, { "epoch": 0.07671895765357258, "grad_norm": 1.8489190340042114, "learning_rate": 9.942925515746397e-06, "loss": 0.907, "step": 2172 }, { "epoch": 0.07675427945728049, "grad_norm": 1.7215958833694458, "learning_rate": 9.942839302383414e-06, "loss": 0.9363, "step": 2173 }, { "epoch": 0.0767896012609884, "grad_norm": 1.86618971824646, "learning_rate": 9.942753024329479e-06, "loss": 0.9206, "step": 2174 }, { "epoch": 0.0768249230646963, "grad_norm": 1.928000807762146, "learning_rate": 9.942666681585723e-06, "loss": 0.9253, "step": 2175 }, { "epoch": 0.0768602448684042, "grad_norm": 1.8707233667373657, "learning_rate": 9.942580274153273e-06, "loss": 0.8994, "step": 2176 }, { "epoch": 0.07689556667211211, "grad_norm": 1.8871816396713257, "learning_rate": 9.942493802033262e-06, "loss": 0.8876, "step": 2177 }, { "epoch": 0.07693088847582001, "grad_norm": 1.8699580430984497, "learning_rate": 9.94240726522682e-06, "loss": 0.9046, "step": 2178 }, { "epoch": 0.07696621027952792, "grad_norm": 1.8432978391647339, "learning_rate": 9.942320663735081e-06, "loss": 0.871, "step": 2179 }, { "epoch": 0.07700153208323583, "grad_norm": 1.8843532800674438, "learning_rate": 9.94223399755918e-06, "loss": 0.9176, "step": 2180 }, { "epoch": 0.07703685388694374, "grad_norm": 1.8005136251449585, "learning_rate": 9.942147266700249e-06, "loss": 0.8544, "step": 2181 }, { "epoch": 0.07707217569065164, "grad_norm": 1.9137860536575317, "learning_rate": 9.942060471159422e-06, "loss": 0.9372, "step": 2182 }, { "epoch": 0.07710749749435955, "grad_norm": 1.3575941324234009, "learning_rate": 9.941973610937838e-06, "loss": 0.6365, "step": 2183 }, { "epoch": 0.07714281929806746, "grad_norm": 2.03840970993042, "learning_rate": 9.941886686036633e-06, "loss": 0.8824, "step": 2184 }, { "epoch": 0.07717814110177536, "grad_norm": 1.963069200515747, "learning_rate": 9.941799696456941e-06, "loss": 0.9226, "step": 2185 }, { "epoch": 0.07721346290548327, "grad_norm": 1.8242069482803345, "learning_rate": 9.941712642199907e-06, "loss": 0.899, "step": 2186 }, { "epoch": 0.07724878470919118, "grad_norm": 1.970212459564209, "learning_rate": 9.941625523266665e-06, "loss": 0.899, "step": 2187 }, { "epoch": 0.07728410651289908, "grad_norm": 2.2490038871765137, "learning_rate": 9.941538339658359e-06, "loss": 0.8809, "step": 2188 }, { "epoch": 0.07731942831660699, "grad_norm": 1.996262550354004, "learning_rate": 9.941451091376126e-06, "loss": 0.9439, "step": 2189 }, { "epoch": 0.0773547501203149, "grad_norm": 1.8686774969100952, "learning_rate": 9.94136377842111e-06, "loss": 0.8996, "step": 2190 }, { "epoch": 0.0773900719240228, "grad_norm": 1.9232170581817627, "learning_rate": 9.941276400794456e-06, "loss": 0.9049, "step": 2191 }, { "epoch": 0.0774253937277307, "grad_norm": 1.6967101097106934, "learning_rate": 9.941188958497304e-06, "loss": 0.9267, "step": 2192 }, { "epoch": 0.07746071553143861, "grad_norm": 1.9195324182510376, "learning_rate": 9.941101451530797e-06, "loss": 0.9098, "step": 2193 }, { "epoch": 0.07749603733514653, "grad_norm": 1.8089014291763306, "learning_rate": 9.941013879896087e-06, "loss": 0.9283, "step": 2194 }, { "epoch": 0.07753135913885442, "grad_norm": 1.6877347230911255, "learning_rate": 9.940926243594315e-06, "loss": 0.908, "step": 2195 }, { "epoch": 0.07756668094256233, "grad_norm": 1.8017189502716064, "learning_rate": 9.940838542626628e-06, "loss": 0.9195, "step": 2196 }, { "epoch": 0.07760200274627024, "grad_norm": 1.860213279724121, "learning_rate": 9.940750776994176e-06, "loss": 0.9064, "step": 2197 }, { "epoch": 0.07763732454997814, "grad_norm": 1.8909388780593872, "learning_rate": 9.940662946698106e-06, "loss": 0.8997, "step": 2198 }, { "epoch": 0.07767264635368605, "grad_norm": 1.7851824760437012, "learning_rate": 9.940575051739569e-06, "loss": 0.9106, "step": 2199 }, { "epoch": 0.07770796815739396, "grad_norm": 1.7859857082366943, "learning_rate": 9.940487092119712e-06, "loss": 0.8882, "step": 2200 }, { "epoch": 0.07774328996110186, "grad_norm": 1.8043967485427856, "learning_rate": 9.940399067839692e-06, "loss": 0.891, "step": 2201 }, { "epoch": 0.07777861176480977, "grad_norm": 1.7569726705551147, "learning_rate": 9.940310978900655e-06, "loss": 0.9115, "step": 2202 }, { "epoch": 0.07781393356851768, "grad_norm": 1.814471960067749, "learning_rate": 9.940222825303757e-06, "loss": 0.8929, "step": 2203 }, { "epoch": 0.07784925537222558, "grad_norm": 1.609951376914978, "learning_rate": 9.940134607050154e-06, "loss": 0.8597, "step": 2204 }, { "epoch": 0.07788457717593349, "grad_norm": 1.8164763450622559, "learning_rate": 9.940046324140997e-06, "loss": 0.8929, "step": 2205 }, { "epoch": 0.0779198989796414, "grad_norm": 1.8645530939102173, "learning_rate": 9.93995797657744e-06, "loss": 0.9149, "step": 2206 }, { "epoch": 0.07795522078334931, "grad_norm": 1.9068877696990967, "learning_rate": 9.939869564360643e-06, "loss": 0.9001, "step": 2207 }, { "epoch": 0.0779905425870572, "grad_norm": 1.7541999816894531, "learning_rate": 9.939781087491762e-06, "loss": 0.9052, "step": 2208 }, { "epoch": 0.07802586439076512, "grad_norm": 2.0653135776519775, "learning_rate": 9.939692545971954e-06, "loss": 0.9198, "step": 2209 }, { "epoch": 0.07806118619447303, "grad_norm": 1.8370534181594849, "learning_rate": 9.93960393980238e-06, "loss": 0.9006, "step": 2210 }, { "epoch": 0.07809650799818092, "grad_norm": 1.7916792631149292, "learning_rate": 9.939515268984197e-06, "loss": 0.9147, "step": 2211 }, { "epoch": 0.07813182980188883, "grad_norm": 1.8799809217453003, "learning_rate": 9.939426533518565e-06, "loss": 0.9071, "step": 2212 }, { "epoch": 0.07816715160559674, "grad_norm": 2.026313304901123, "learning_rate": 9.93933773340665e-06, "loss": 0.9504, "step": 2213 }, { "epoch": 0.07820247340930464, "grad_norm": 1.7842448949813843, "learning_rate": 9.93924886864961e-06, "loss": 0.8989, "step": 2214 }, { "epoch": 0.07823779521301255, "grad_norm": 1.7911045551300049, "learning_rate": 9.939159939248609e-06, "loss": 0.8942, "step": 2215 }, { "epoch": 0.07827311701672046, "grad_norm": 1.6884506940841675, "learning_rate": 9.939070945204812e-06, "loss": 0.9171, "step": 2216 }, { "epoch": 0.07830843882042836, "grad_norm": 1.8180207014083862, "learning_rate": 9.938981886519383e-06, "loss": 0.9025, "step": 2217 }, { "epoch": 0.07834376062413627, "grad_norm": 1.9637852907180786, "learning_rate": 9.938892763193486e-06, "loss": 0.8883, "step": 2218 }, { "epoch": 0.07837908242784418, "grad_norm": 1.7951768636703491, "learning_rate": 9.938803575228291e-06, "loss": 0.8576, "step": 2219 }, { "epoch": 0.07841440423155209, "grad_norm": 1.6801121234893799, "learning_rate": 9.93871432262496e-06, "loss": 0.8978, "step": 2220 }, { "epoch": 0.07844972603525999, "grad_norm": 2.001000165939331, "learning_rate": 9.938625005384668e-06, "loss": 0.8829, "step": 2221 }, { "epoch": 0.0784850478389679, "grad_norm": 1.8336732387542725, "learning_rate": 9.938535623508578e-06, "loss": 0.8879, "step": 2222 }, { "epoch": 0.07852036964267581, "grad_norm": 1.8771077394485474, "learning_rate": 9.938446176997862e-06, "loss": 0.9087, "step": 2223 }, { "epoch": 0.0785556914463837, "grad_norm": 1.859389305114746, "learning_rate": 9.938356665853692e-06, "loss": 0.9149, "step": 2224 }, { "epoch": 0.07859101325009162, "grad_norm": 2.0237743854522705, "learning_rate": 9.938267090077238e-06, "loss": 0.8686, "step": 2225 }, { "epoch": 0.07862633505379953, "grad_norm": 1.9439995288848877, "learning_rate": 9.938177449669673e-06, "loss": 0.9048, "step": 2226 }, { "epoch": 0.07866165685750742, "grad_norm": 1.2178130149841309, "learning_rate": 9.93808774463217e-06, "loss": 0.5793, "step": 2227 }, { "epoch": 0.07869697866121533, "grad_norm": 1.7849628925323486, "learning_rate": 9.9379979749659e-06, "loss": 0.8939, "step": 2228 }, { "epoch": 0.07873230046492324, "grad_norm": 1.8483054637908936, "learning_rate": 9.937908140672044e-06, "loss": 0.9143, "step": 2229 }, { "epoch": 0.07876762226863115, "grad_norm": 1.7097063064575195, "learning_rate": 9.937818241751773e-06, "loss": 0.9032, "step": 2230 }, { "epoch": 0.07880294407233905, "grad_norm": 1.8652151823043823, "learning_rate": 9.937728278206265e-06, "loss": 0.9414, "step": 2231 }, { "epoch": 0.07883826587604696, "grad_norm": 1.656803011894226, "learning_rate": 9.9376382500367e-06, "loss": 0.8753, "step": 2232 }, { "epoch": 0.07887358767975487, "grad_norm": 1.9168367385864258, "learning_rate": 9.93754815724425e-06, "loss": 0.8889, "step": 2233 }, { "epoch": 0.07890890948346277, "grad_norm": 2.201660633087158, "learning_rate": 9.9374579998301e-06, "loss": 0.9072, "step": 2234 }, { "epoch": 0.07894423128717068, "grad_norm": 2.1516456604003906, "learning_rate": 9.937367777795428e-06, "loss": 0.92, "step": 2235 }, { "epoch": 0.07897955309087859, "grad_norm": 1.9395428895950317, "learning_rate": 9.937277491141413e-06, "loss": 0.9509, "step": 2236 }, { "epoch": 0.07901487489458649, "grad_norm": 1.9964731931686401, "learning_rate": 9.937187139869239e-06, "loss": 0.9021, "step": 2237 }, { "epoch": 0.0790501966982944, "grad_norm": 1.9305363893508911, "learning_rate": 9.93709672398009e-06, "loss": 0.8947, "step": 2238 }, { "epoch": 0.07908551850200231, "grad_norm": 2.083566665649414, "learning_rate": 9.937006243475145e-06, "loss": 0.9478, "step": 2239 }, { "epoch": 0.0791208403057102, "grad_norm": 1.8783471584320068, "learning_rate": 9.93691569835559e-06, "loss": 0.9212, "step": 2240 }, { "epoch": 0.07915616210941812, "grad_norm": 1.7364581823349, "learning_rate": 9.93682508862261e-06, "loss": 0.8846, "step": 2241 }, { "epoch": 0.07919148391312603, "grad_norm": 1.908782720565796, "learning_rate": 9.936734414277393e-06, "loss": 0.911, "step": 2242 }, { "epoch": 0.07922680571683394, "grad_norm": 1.937802791595459, "learning_rate": 9.936643675321122e-06, "loss": 0.9318, "step": 2243 }, { "epoch": 0.07926212752054183, "grad_norm": 1.9018763303756714, "learning_rate": 9.936552871754989e-06, "loss": 0.8863, "step": 2244 }, { "epoch": 0.07929744932424974, "grad_norm": 1.730920433998108, "learning_rate": 9.936462003580178e-06, "loss": 0.8967, "step": 2245 }, { "epoch": 0.07933277112795765, "grad_norm": 1.7719743251800537, "learning_rate": 9.936371070797881e-06, "loss": 0.8992, "step": 2246 }, { "epoch": 0.07936809293166555, "grad_norm": 1.9877060651779175, "learning_rate": 9.936280073409288e-06, "loss": 0.8929, "step": 2247 }, { "epoch": 0.07940341473537346, "grad_norm": 1.801527738571167, "learning_rate": 9.93618901141559e-06, "loss": 0.9289, "step": 2248 }, { "epoch": 0.07943873653908137, "grad_norm": 1.7073978185653687, "learning_rate": 9.936097884817974e-06, "loss": 0.884, "step": 2249 }, { "epoch": 0.07947405834278927, "grad_norm": 1.728248953819275, "learning_rate": 9.936006693617641e-06, "loss": 0.9101, "step": 2250 }, { "epoch": 0.07950938014649718, "grad_norm": 1.8772478103637695, "learning_rate": 9.935915437815779e-06, "loss": 0.9173, "step": 2251 }, { "epoch": 0.07954470195020509, "grad_norm": 1.998494029045105, "learning_rate": 9.935824117413584e-06, "loss": 0.8775, "step": 2252 }, { "epoch": 0.07958002375391299, "grad_norm": 1.8915551900863647, "learning_rate": 9.935732732412249e-06, "loss": 0.8789, "step": 2253 }, { "epoch": 0.0796153455576209, "grad_norm": 1.8287984132766724, "learning_rate": 9.935641282812974e-06, "loss": 0.8604, "step": 2254 }, { "epoch": 0.07965066736132881, "grad_norm": 1.9613865613937378, "learning_rate": 9.935549768616952e-06, "loss": 0.934, "step": 2255 }, { "epoch": 0.07968598916503672, "grad_norm": 1.8336325883865356, "learning_rate": 9.935458189825383e-06, "loss": 0.9226, "step": 2256 }, { "epoch": 0.07972131096874462, "grad_norm": 1.902942419052124, "learning_rate": 9.935366546439465e-06, "loss": 0.8945, "step": 2257 }, { "epoch": 0.07975663277245253, "grad_norm": 2.272890090942383, "learning_rate": 9.935274838460398e-06, "loss": 0.8905, "step": 2258 }, { "epoch": 0.07979195457616044, "grad_norm": 1.8296674489974976, "learning_rate": 9.935183065889381e-06, "loss": 0.91, "step": 2259 }, { "epoch": 0.07982727637986833, "grad_norm": 1.7470530271530151, "learning_rate": 9.935091228727616e-06, "loss": 0.9253, "step": 2260 }, { "epoch": 0.07986259818357624, "grad_norm": 2.0017058849334717, "learning_rate": 9.934999326976305e-06, "loss": 0.8591, "step": 2261 }, { "epoch": 0.07989791998728416, "grad_norm": 1.8488808870315552, "learning_rate": 9.93490736063665e-06, "loss": 0.8943, "step": 2262 }, { "epoch": 0.07993324179099205, "grad_norm": 1.872636079788208, "learning_rate": 9.934815329709854e-06, "loss": 0.9079, "step": 2263 }, { "epoch": 0.07996856359469996, "grad_norm": 1.80549955368042, "learning_rate": 9.934723234197125e-06, "loss": 0.8739, "step": 2264 }, { "epoch": 0.08000388539840787, "grad_norm": 1.8076472282409668, "learning_rate": 9.934631074099665e-06, "loss": 0.9083, "step": 2265 }, { "epoch": 0.08003920720211577, "grad_norm": 1.6111232042312622, "learning_rate": 9.93453884941868e-06, "loss": 0.6378, "step": 2266 }, { "epoch": 0.08007452900582368, "grad_norm": 1.923499345779419, "learning_rate": 9.93444656015538e-06, "loss": 0.9012, "step": 2267 }, { "epoch": 0.08010985080953159, "grad_norm": 1.9941750764846802, "learning_rate": 9.93435420631097e-06, "loss": 0.9153, "step": 2268 }, { "epoch": 0.0801451726132395, "grad_norm": 1.8119713068008423, "learning_rate": 9.934261787886658e-06, "loss": 0.9091, "step": 2269 }, { "epoch": 0.0801804944169474, "grad_norm": 1.9619933366775513, "learning_rate": 9.934169304883659e-06, "loss": 0.8836, "step": 2270 }, { "epoch": 0.08021581622065531, "grad_norm": 1.8663601875305176, "learning_rate": 9.934076757303177e-06, "loss": 0.9099, "step": 2271 }, { "epoch": 0.08025113802436322, "grad_norm": 1.7860231399536133, "learning_rate": 9.933984145146427e-06, "loss": 0.9348, "step": 2272 }, { "epoch": 0.08028645982807112, "grad_norm": 1.840193748474121, "learning_rate": 9.933891468414617e-06, "loss": 0.9377, "step": 2273 }, { "epoch": 0.08032178163177903, "grad_norm": 1.904155969619751, "learning_rate": 9.933798727108968e-06, "loss": 0.9465, "step": 2274 }, { "epoch": 0.08035710343548694, "grad_norm": 1.699912190437317, "learning_rate": 9.933705921230685e-06, "loss": 0.9018, "step": 2275 }, { "epoch": 0.08039242523919483, "grad_norm": 1.6818742752075195, "learning_rate": 9.933613050780989e-06, "loss": 0.8785, "step": 2276 }, { "epoch": 0.08042774704290274, "grad_norm": 1.7139440774917603, "learning_rate": 9.933520115761091e-06, "loss": 0.8903, "step": 2277 }, { "epoch": 0.08046306884661066, "grad_norm": 1.978165626525879, "learning_rate": 9.933427116172208e-06, "loss": 0.9131, "step": 2278 }, { "epoch": 0.08049839065031855, "grad_norm": 1.9006884098052979, "learning_rate": 9.93333405201556e-06, "loss": 0.9121, "step": 2279 }, { "epoch": 0.08053371245402646, "grad_norm": 1.8563334941864014, "learning_rate": 9.933240923292363e-06, "loss": 0.9072, "step": 2280 }, { "epoch": 0.08056903425773437, "grad_norm": 1.9014323949813843, "learning_rate": 9.933147730003836e-06, "loss": 0.8877, "step": 2281 }, { "epoch": 0.08060435606144228, "grad_norm": 1.9663364887237549, "learning_rate": 9.9330544721512e-06, "loss": 0.9414, "step": 2282 }, { "epoch": 0.08063967786515018, "grad_norm": 1.7979904413223267, "learning_rate": 9.932961149735674e-06, "loss": 0.8883, "step": 2283 }, { "epoch": 0.08067499966885809, "grad_norm": 1.8933027982711792, "learning_rate": 9.932867762758477e-06, "loss": 0.9273, "step": 2284 }, { "epoch": 0.080710321472566, "grad_norm": 1.6848361492156982, "learning_rate": 9.932774311220838e-06, "loss": 0.9043, "step": 2285 }, { "epoch": 0.0807456432762739, "grad_norm": 1.8050012588500977, "learning_rate": 9.932680795123974e-06, "loss": 0.8942, "step": 2286 }, { "epoch": 0.08078096507998181, "grad_norm": 1.778310775756836, "learning_rate": 9.93258721446911e-06, "loss": 0.9062, "step": 2287 }, { "epoch": 0.08081628688368972, "grad_norm": 2.1540470123291016, "learning_rate": 9.932493569257474e-06, "loss": 0.9613, "step": 2288 }, { "epoch": 0.08085160868739762, "grad_norm": 1.7015295028686523, "learning_rate": 9.932399859490288e-06, "loss": 0.9004, "step": 2289 }, { "epoch": 0.08088693049110553, "grad_norm": 1.8428988456726074, "learning_rate": 9.93230608516878e-06, "loss": 0.9354, "step": 2290 }, { "epoch": 0.08092225229481344, "grad_norm": 1.8568013906478882, "learning_rate": 9.932212246294178e-06, "loss": 0.9311, "step": 2291 }, { "epoch": 0.08095757409852133, "grad_norm": 1.694456696510315, "learning_rate": 9.932118342867707e-06, "loss": 0.9363, "step": 2292 }, { "epoch": 0.08099289590222924, "grad_norm": 1.7368049621582031, "learning_rate": 9.9320243748906e-06, "loss": 0.8872, "step": 2293 }, { "epoch": 0.08102821770593716, "grad_norm": 1.7765307426452637, "learning_rate": 9.931930342364086e-06, "loss": 0.8301, "step": 2294 }, { "epoch": 0.08106353950964507, "grad_norm": 2.0049469470977783, "learning_rate": 9.931836245289393e-06, "loss": 0.917, "step": 2295 }, { "epoch": 0.08109886131335296, "grad_norm": 2.0231828689575195, "learning_rate": 9.931742083667756e-06, "loss": 0.9282, "step": 2296 }, { "epoch": 0.08113418311706087, "grad_norm": 1.769631266593933, "learning_rate": 9.931647857500404e-06, "loss": 0.9145, "step": 2297 }, { "epoch": 0.08116950492076878, "grad_norm": 1.8323936462402344, "learning_rate": 9.931553566788573e-06, "loss": 0.9137, "step": 2298 }, { "epoch": 0.08120482672447668, "grad_norm": 1.8250178098678589, "learning_rate": 9.931459211533496e-06, "loss": 0.918, "step": 2299 }, { "epoch": 0.08124014852818459, "grad_norm": 1.7591423988342285, "learning_rate": 9.931364791736406e-06, "loss": 0.8827, "step": 2300 }, { "epoch": 0.0812754703318925, "grad_norm": 1.8987375497817993, "learning_rate": 9.931270307398543e-06, "loss": 0.9233, "step": 2301 }, { "epoch": 0.0813107921356004, "grad_norm": 1.9419291019439697, "learning_rate": 9.93117575852114e-06, "loss": 0.92, "step": 2302 }, { "epoch": 0.08134611393930831, "grad_norm": 2.0741043090820312, "learning_rate": 9.931081145105436e-06, "loss": 0.9233, "step": 2303 }, { "epoch": 0.08138143574301622, "grad_norm": 1.9025962352752686, "learning_rate": 9.930986467152667e-06, "loss": 0.8886, "step": 2304 }, { "epoch": 0.08141675754672412, "grad_norm": 1.9210155010223389, "learning_rate": 9.930891724664078e-06, "loss": 0.9107, "step": 2305 }, { "epoch": 0.08145207935043203, "grad_norm": 1.907102108001709, "learning_rate": 9.930796917640903e-06, "loss": 0.8675, "step": 2306 }, { "epoch": 0.08148740115413994, "grad_norm": 1.9306530952453613, "learning_rate": 9.930702046084384e-06, "loss": 0.8992, "step": 2307 }, { "epoch": 0.08152272295784785, "grad_norm": 1.7838348150253296, "learning_rate": 9.930607109995765e-06, "loss": 0.8682, "step": 2308 }, { "epoch": 0.08155804476155575, "grad_norm": 1.6659282445907593, "learning_rate": 9.930512109376284e-06, "loss": 0.9476, "step": 2309 }, { "epoch": 0.08159336656526366, "grad_norm": 1.6812586784362793, "learning_rate": 9.93041704422719e-06, "loss": 0.8607, "step": 2310 }, { "epoch": 0.08162868836897157, "grad_norm": 1.9538745880126953, "learning_rate": 9.930321914549726e-06, "loss": 0.8886, "step": 2311 }, { "epoch": 0.08166401017267946, "grad_norm": 1.7102723121643066, "learning_rate": 9.930226720345133e-06, "loss": 0.9007, "step": 2312 }, { "epoch": 0.08169933197638737, "grad_norm": 1.817542314529419, "learning_rate": 9.93013146161466e-06, "loss": 0.8734, "step": 2313 }, { "epoch": 0.08173465378009528, "grad_norm": 1.9457814693450928, "learning_rate": 9.930036138359554e-06, "loss": 0.942, "step": 2314 }, { "epoch": 0.08176997558380318, "grad_norm": 1.975312352180481, "learning_rate": 9.929940750581059e-06, "loss": 0.9151, "step": 2315 }, { "epoch": 0.08180529738751109, "grad_norm": 2.0783116817474365, "learning_rate": 9.929845298280429e-06, "loss": 0.9204, "step": 2316 }, { "epoch": 0.081840619191219, "grad_norm": 2.06982159614563, "learning_rate": 9.929749781458909e-06, "loss": 0.9066, "step": 2317 }, { "epoch": 0.0818759409949269, "grad_norm": 2.163360834121704, "learning_rate": 9.929654200117751e-06, "loss": 0.9193, "step": 2318 }, { "epoch": 0.08191126279863481, "grad_norm": 2.139050006866455, "learning_rate": 9.929558554258207e-06, "loss": 0.8992, "step": 2319 }, { "epoch": 0.08194658460234272, "grad_norm": 1.757525086402893, "learning_rate": 9.929462843881525e-06, "loss": 0.9125, "step": 2320 }, { "epoch": 0.08198190640605063, "grad_norm": 2.4237539768218994, "learning_rate": 9.929367068988962e-06, "loss": 0.8981, "step": 2321 }, { "epoch": 0.08201722820975853, "grad_norm": 1.6916838884353638, "learning_rate": 9.929271229581767e-06, "loss": 0.8922, "step": 2322 }, { "epoch": 0.08205255001346644, "grad_norm": 1.810025930404663, "learning_rate": 9.929175325661198e-06, "loss": 0.8968, "step": 2323 }, { "epoch": 0.08208787181717435, "grad_norm": 1.855526089668274, "learning_rate": 9.92907935722851e-06, "loss": 0.8807, "step": 2324 }, { "epoch": 0.08212319362088225, "grad_norm": 1.834709882736206, "learning_rate": 9.928983324284957e-06, "loss": 0.8801, "step": 2325 }, { "epoch": 0.08215851542459016, "grad_norm": 1.7921828031539917, "learning_rate": 9.928887226831796e-06, "loss": 0.8911, "step": 2326 }, { "epoch": 0.08219383722829807, "grad_norm": 1.8058099746704102, "learning_rate": 9.928791064870287e-06, "loss": 0.8986, "step": 2327 }, { "epoch": 0.08222915903200596, "grad_norm": 1.810152530670166, "learning_rate": 9.928694838401687e-06, "loss": 0.8989, "step": 2328 }, { "epoch": 0.08226448083571387, "grad_norm": 2.00410795211792, "learning_rate": 9.928598547427254e-06, "loss": 0.9337, "step": 2329 }, { "epoch": 0.08229980263942178, "grad_norm": 1.8496454954147339, "learning_rate": 9.92850219194825e-06, "loss": 0.8899, "step": 2330 }, { "epoch": 0.0823351244431297, "grad_norm": 2.3411824703216553, "learning_rate": 9.928405771965936e-06, "loss": 0.9015, "step": 2331 }, { "epoch": 0.08237044624683759, "grad_norm": 2.006242275238037, "learning_rate": 9.928309287481575e-06, "loss": 0.8843, "step": 2332 }, { "epoch": 0.0824057680505455, "grad_norm": 1.8560177087783813, "learning_rate": 9.928212738496426e-06, "loss": 0.8943, "step": 2333 }, { "epoch": 0.08244108985425341, "grad_norm": 2.0507187843322754, "learning_rate": 9.928116125011757e-06, "loss": 0.9126, "step": 2334 }, { "epoch": 0.08247641165796131, "grad_norm": 1.715224266052246, "learning_rate": 9.92801944702883e-06, "loss": 0.9076, "step": 2335 }, { "epoch": 0.08251173346166922, "grad_norm": 1.7918972969055176, "learning_rate": 9.927922704548911e-06, "loss": 0.8762, "step": 2336 }, { "epoch": 0.08254705526537713, "grad_norm": 1.9431021213531494, "learning_rate": 9.927825897573267e-06, "loss": 0.9091, "step": 2337 }, { "epoch": 0.08258237706908503, "grad_norm": 1.6679753065109253, "learning_rate": 9.927729026103161e-06, "loss": 0.8713, "step": 2338 }, { "epoch": 0.08261769887279294, "grad_norm": 2.0115342140197754, "learning_rate": 9.927632090139868e-06, "loss": 0.925, "step": 2339 }, { "epoch": 0.08265302067650085, "grad_norm": 1.7903251647949219, "learning_rate": 9.927535089684648e-06, "loss": 0.9534, "step": 2340 }, { "epoch": 0.08268834248020875, "grad_norm": 2.0229134559631348, "learning_rate": 9.927438024738778e-06, "loss": 0.8856, "step": 2341 }, { "epoch": 0.08272366428391666, "grad_norm": 2.172125816345215, "learning_rate": 9.927340895303524e-06, "loss": 0.8681, "step": 2342 }, { "epoch": 0.08275898608762457, "grad_norm": 1.7745418548583984, "learning_rate": 9.92724370138016e-06, "loss": 0.907, "step": 2343 }, { "epoch": 0.08279430789133248, "grad_norm": 1.7315926551818848, "learning_rate": 9.927146442969955e-06, "loss": 0.8754, "step": 2344 }, { "epoch": 0.08282962969504037, "grad_norm": 1.7162550687789917, "learning_rate": 9.927049120074185e-06, "loss": 0.8912, "step": 2345 }, { "epoch": 0.08286495149874828, "grad_norm": 1.7576342821121216, "learning_rate": 9.92695173269412e-06, "loss": 0.9343, "step": 2346 }, { "epoch": 0.0829002733024562, "grad_norm": 2.0660855770111084, "learning_rate": 9.92685428083104e-06, "loss": 0.9092, "step": 2347 }, { "epoch": 0.08293559510616409, "grad_norm": 1.804836392402649, "learning_rate": 9.926756764486216e-06, "loss": 0.8733, "step": 2348 }, { "epoch": 0.082970916909872, "grad_norm": 1.8948200941085815, "learning_rate": 9.926659183660925e-06, "loss": 0.9291, "step": 2349 }, { "epoch": 0.08300623871357991, "grad_norm": 1.787217617034912, "learning_rate": 9.926561538356446e-06, "loss": 0.9117, "step": 2350 }, { "epoch": 0.08304156051728781, "grad_norm": 1.8268510103225708, "learning_rate": 9.926463828574054e-06, "loss": 0.9117, "step": 2351 }, { "epoch": 0.08307688232099572, "grad_norm": 1.7478489875793457, "learning_rate": 9.926366054315031e-06, "loss": 0.9153, "step": 2352 }, { "epoch": 0.08311220412470363, "grad_norm": 1.7347991466522217, "learning_rate": 9.926268215580655e-06, "loss": 0.9018, "step": 2353 }, { "epoch": 0.08314752592841153, "grad_norm": 1.8965933322906494, "learning_rate": 9.926170312372205e-06, "loss": 0.9381, "step": 2354 }, { "epoch": 0.08318284773211944, "grad_norm": 1.7841997146606445, "learning_rate": 9.926072344690966e-06, "loss": 0.909, "step": 2355 }, { "epoch": 0.08321816953582735, "grad_norm": 1.7522283792495728, "learning_rate": 9.925974312538217e-06, "loss": 0.9212, "step": 2356 }, { "epoch": 0.08325349133953526, "grad_norm": 1.672288417816162, "learning_rate": 9.925876215915243e-06, "loss": 0.8843, "step": 2357 }, { "epoch": 0.08328881314324316, "grad_norm": 1.7032300233840942, "learning_rate": 9.925778054823325e-06, "loss": 0.9206, "step": 2358 }, { "epoch": 0.08332413494695107, "grad_norm": 1.9932310581207275, "learning_rate": 9.925679829263753e-06, "loss": 0.9263, "step": 2359 }, { "epoch": 0.08335945675065898, "grad_norm": 1.9558700323104858, "learning_rate": 9.925581539237807e-06, "loss": 0.9027, "step": 2360 }, { "epoch": 0.08339477855436687, "grad_norm": 1.7152984142303467, "learning_rate": 9.925483184746777e-06, "loss": 0.8551, "step": 2361 }, { "epoch": 0.08343010035807479, "grad_norm": 1.7976701259613037, "learning_rate": 9.925384765791948e-06, "loss": 0.9112, "step": 2362 }, { "epoch": 0.0834654221617827, "grad_norm": 1.6528726816177368, "learning_rate": 9.92528628237461e-06, "loss": 0.9167, "step": 2363 }, { "epoch": 0.08350074396549059, "grad_norm": 1.816356897354126, "learning_rate": 9.925187734496049e-06, "loss": 0.9073, "step": 2364 }, { "epoch": 0.0835360657691985, "grad_norm": 1.3464794158935547, "learning_rate": 9.925089122157558e-06, "loss": 0.6089, "step": 2365 }, { "epoch": 0.08357138757290641, "grad_norm": 1.8963334560394287, "learning_rate": 9.924990445360427e-06, "loss": 0.8921, "step": 2366 }, { "epoch": 0.08360670937661431, "grad_norm": 2.029137134552002, "learning_rate": 9.924891704105945e-06, "loss": 0.8643, "step": 2367 }, { "epoch": 0.08364203118032222, "grad_norm": 1.7881362438201904, "learning_rate": 9.924792898395407e-06, "loss": 0.921, "step": 2368 }, { "epoch": 0.08367735298403013, "grad_norm": 1.7651103734970093, "learning_rate": 9.924694028230106e-06, "loss": 0.9257, "step": 2369 }, { "epoch": 0.08371267478773804, "grad_norm": 1.6812171936035156, "learning_rate": 9.924595093611335e-06, "loss": 0.8617, "step": 2370 }, { "epoch": 0.08374799659144594, "grad_norm": 1.7522331476211548, "learning_rate": 9.92449609454039e-06, "loss": 0.8893, "step": 2371 }, { "epoch": 0.08378331839515385, "grad_norm": 1.7753374576568604, "learning_rate": 9.924397031018565e-06, "loss": 0.8856, "step": 2372 }, { "epoch": 0.08381864019886176, "grad_norm": 1.8462001085281372, "learning_rate": 9.924297903047157e-06, "loss": 0.887, "step": 2373 }, { "epoch": 0.08385396200256966, "grad_norm": 1.653271198272705, "learning_rate": 9.924198710627465e-06, "loss": 0.8878, "step": 2374 }, { "epoch": 0.08388928380627757, "grad_norm": 1.91096031665802, "learning_rate": 9.924099453760786e-06, "loss": 0.8846, "step": 2375 }, { "epoch": 0.08392460560998548, "grad_norm": 1.8270292282104492, "learning_rate": 9.924000132448418e-06, "loss": 0.9105, "step": 2376 }, { "epoch": 0.08395992741369337, "grad_norm": 1.6460767984390259, "learning_rate": 9.923900746691659e-06, "loss": 0.9005, "step": 2377 }, { "epoch": 0.08399524921740129, "grad_norm": 1.686915636062622, "learning_rate": 9.923801296491815e-06, "loss": 0.916, "step": 2378 }, { "epoch": 0.0840305710211092, "grad_norm": 1.6953368186950684, "learning_rate": 9.923701781850186e-06, "loss": 0.9144, "step": 2379 }, { "epoch": 0.08406589282481709, "grad_norm": 1.726345181465149, "learning_rate": 9.923602202768075e-06, "loss": 0.9037, "step": 2380 }, { "epoch": 0.084101214628525, "grad_norm": 1.936469316482544, "learning_rate": 9.923502559246781e-06, "loss": 0.8949, "step": 2381 }, { "epoch": 0.08413653643223291, "grad_norm": 1.8888963460922241, "learning_rate": 9.923402851287613e-06, "loss": 0.9179, "step": 2382 }, { "epoch": 0.08417185823594082, "grad_norm": 1.8466933965682983, "learning_rate": 9.923303078891873e-06, "loss": 0.9217, "step": 2383 }, { "epoch": 0.08420718003964872, "grad_norm": 1.7830586433410645, "learning_rate": 9.923203242060868e-06, "loss": 0.9281, "step": 2384 }, { "epoch": 0.08424250184335663, "grad_norm": 2.1677584648132324, "learning_rate": 9.923103340795905e-06, "loss": 0.9352, "step": 2385 }, { "epoch": 0.08427782364706454, "grad_norm": 1.9999831914901733, "learning_rate": 9.92300337509829e-06, "loss": 0.8652, "step": 2386 }, { "epoch": 0.08431314545077244, "grad_norm": 1.9341583251953125, "learning_rate": 9.922903344969332e-06, "loss": 0.915, "step": 2387 }, { "epoch": 0.08434846725448035, "grad_norm": 1.8274027109146118, "learning_rate": 9.922803250410342e-06, "loss": 0.9069, "step": 2388 }, { "epoch": 0.08438378905818826, "grad_norm": 1.9085595607757568, "learning_rate": 9.922703091422628e-06, "loss": 0.8954, "step": 2389 }, { "epoch": 0.08441911086189616, "grad_norm": 2.124673366546631, "learning_rate": 9.9226028680075e-06, "loss": 0.8902, "step": 2390 }, { "epoch": 0.08445443266560407, "grad_norm": 1.9276145696640015, "learning_rate": 9.922502580166272e-06, "loss": 0.8795, "step": 2391 }, { "epoch": 0.08448975446931198, "grad_norm": 2.0543506145477295, "learning_rate": 9.922402227900254e-06, "loss": 0.9491, "step": 2392 }, { "epoch": 0.08452507627301988, "grad_norm": 1.8532510995864868, "learning_rate": 9.922301811210764e-06, "loss": 0.9142, "step": 2393 }, { "epoch": 0.08456039807672779, "grad_norm": 1.9245823621749878, "learning_rate": 9.92220133009911e-06, "loss": 0.9087, "step": 2394 }, { "epoch": 0.0845957198804357, "grad_norm": 1.9521878957748413, "learning_rate": 9.922100784566613e-06, "loss": 0.9129, "step": 2395 }, { "epoch": 0.08463104168414361, "grad_norm": 1.1805851459503174, "learning_rate": 9.922000174614584e-06, "loss": 0.5891, "step": 2396 }, { "epoch": 0.0846663634878515, "grad_norm": 2.013009548187256, "learning_rate": 9.921899500244343e-06, "loss": 0.9285, "step": 2397 }, { "epoch": 0.08470168529155941, "grad_norm": 2.095270872116089, "learning_rate": 9.921798761457207e-06, "loss": 0.9322, "step": 2398 }, { "epoch": 0.08473700709526732, "grad_norm": 1.830929160118103, "learning_rate": 9.921697958254493e-06, "loss": 0.9348, "step": 2399 }, { "epoch": 0.08477232889897522, "grad_norm": 1.7699681520462036, "learning_rate": 9.921597090637521e-06, "loss": 0.8758, "step": 2400 }, { "epoch": 0.08480765070268313, "grad_norm": 1.787065029144287, "learning_rate": 9.921496158607613e-06, "loss": 0.8875, "step": 2401 }, { "epoch": 0.08484297250639104, "grad_norm": 1.796931266784668, "learning_rate": 9.921395162166088e-06, "loss": 0.9256, "step": 2402 }, { "epoch": 0.08487829431009894, "grad_norm": 1.9062824249267578, "learning_rate": 9.921294101314268e-06, "loss": 0.8954, "step": 2403 }, { "epoch": 0.08491361611380685, "grad_norm": 1.9705181121826172, "learning_rate": 9.921192976053476e-06, "loss": 0.9031, "step": 2404 }, { "epoch": 0.08494893791751476, "grad_norm": 1.7509170770645142, "learning_rate": 9.921091786385033e-06, "loss": 0.8928, "step": 2405 }, { "epoch": 0.08498425972122266, "grad_norm": 1.0841439962387085, "learning_rate": 9.920990532310268e-06, "loss": 0.5716, "step": 2406 }, { "epoch": 0.08501958152493057, "grad_norm": 1.7752149105072021, "learning_rate": 9.920889213830505e-06, "loss": 0.8713, "step": 2407 }, { "epoch": 0.08505490332863848, "grad_norm": 1.8694400787353516, "learning_rate": 9.920787830947068e-06, "loss": 0.8911, "step": 2408 }, { "epoch": 0.08509022513234639, "grad_norm": 1.8261042833328247, "learning_rate": 9.920686383661285e-06, "loss": 0.897, "step": 2409 }, { "epoch": 0.08512554693605429, "grad_norm": 1.8985605239868164, "learning_rate": 9.920584871974484e-06, "loss": 0.9302, "step": 2410 }, { "epoch": 0.0851608687397622, "grad_norm": 1.9659579992294312, "learning_rate": 9.920483295887991e-06, "loss": 0.9171, "step": 2411 }, { "epoch": 0.08519619054347011, "grad_norm": 1.870181918144226, "learning_rate": 9.92038165540314e-06, "loss": 0.9252, "step": 2412 }, { "epoch": 0.085231512347178, "grad_norm": 1.849777102470398, "learning_rate": 9.920279950521257e-06, "loss": 0.9021, "step": 2413 }, { "epoch": 0.08526683415088591, "grad_norm": 2.0187227725982666, "learning_rate": 9.920178181243675e-06, "loss": 0.8847, "step": 2414 }, { "epoch": 0.08530215595459383, "grad_norm": 1.788246989250183, "learning_rate": 9.920076347571728e-06, "loss": 0.9043, "step": 2415 }, { "epoch": 0.08533747775830172, "grad_norm": 1.8947848081588745, "learning_rate": 9.919974449506745e-06, "loss": 0.9146, "step": 2416 }, { "epoch": 0.08537279956200963, "grad_norm": 2.1423609256744385, "learning_rate": 9.919872487050062e-06, "loss": 0.94, "step": 2417 }, { "epoch": 0.08540812136571754, "grad_norm": 1.7274123430252075, "learning_rate": 9.919770460203011e-06, "loss": 0.9262, "step": 2418 }, { "epoch": 0.08544344316942544, "grad_norm": 1.9060922861099243, "learning_rate": 9.919668368966932e-06, "loss": 0.8748, "step": 2419 }, { "epoch": 0.08547876497313335, "grad_norm": 1.8957332372665405, "learning_rate": 9.919566213343155e-06, "loss": 0.8962, "step": 2420 }, { "epoch": 0.08551408677684126, "grad_norm": 2.1857638359069824, "learning_rate": 9.919463993333023e-06, "loss": 0.8855, "step": 2421 }, { "epoch": 0.08554940858054917, "grad_norm": 1.6062438488006592, "learning_rate": 9.91936170893787e-06, "loss": 0.9097, "step": 2422 }, { "epoch": 0.08558473038425707, "grad_norm": 1.7213876247406006, "learning_rate": 9.919259360159036e-06, "loss": 0.9306, "step": 2423 }, { "epoch": 0.08562005218796498, "grad_norm": 1.6870845556259155, "learning_rate": 9.91915694699786e-06, "loss": 0.9011, "step": 2424 }, { "epoch": 0.08565537399167289, "grad_norm": 1.7415308952331543, "learning_rate": 9.91905446945568e-06, "loss": 0.8769, "step": 2425 }, { "epoch": 0.08569069579538079, "grad_norm": 1.8221609592437744, "learning_rate": 9.918951927533843e-06, "loss": 0.9162, "step": 2426 }, { "epoch": 0.0857260175990887, "grad_norm": 1.7541847229003906, "learning_rate": 9.918849321233686e-06, "loss": 0.8779, "step": 2427 }, { "epoch": 0.08576133940279661, "grad_norm": 1.9042389392852783, "learning_rate": 9.918746650556555e-06, "loss": 0.8955, "step": 2428 }, { "epoch": 0.0857966612065045, "grad_norm": 1.7372193336486816, "learning_rate": 9.918643915503792e-06, "loss": 0.9061, "step": 2429 }, { "epoch": 0.08583198301021241, "grad_norm": 2.0006821155548096, "learning_rate": 9.918541116076742e-06, "loss": 0.9158, "step": 2430 }, { "epoch": 0.08586730481392033, "grad_norm": 1.7924680709838867, "learning_rate": 9.91843825227675e-06, "loss": 0.8951, "step": 2431 }, { "epoch": 0.08590262661762824, "grad_norm": 1.800036072731018, "learning_rate": 9.918335324105164e-06, "loss": 0.9118, "step": 2432 }, { "epoch": 0.08593794842133613, "grad_norm": 1.7839336395263672, "learning_rate": 9.91823233156333e-06, "loss": 0.9026, "step": 2433 }, { "epoch": 0.08597327022504404, "grad_norm": 1.7544376850128174, "learning_rate": 9.918129274652593e-06, "loss": 0.9122, "step": 2434 }, { "epoch": 0.08600859202875195, "grad_norm": 1.821824073791504, "learning_rate": 9.918026153374307e-06, "loss": 0.9108, "step": 2435 }, { "epoch": 0.08604391383245985, "grad_norm": 1.6770328283309937, "learning_rate": 9.917922967729818e-06, "loss": 0.8676, "step": 2436 }, { "epoch": 0.08607923563616776, "grad_norm": 1.7355724573135376, "learning_rate": 9.917819717720478e-06, "loss": 0.8912, "step": 2437 }, { "epoch": 0.08611455743987567, "grad_norm": 2.250760793685913, "learning_rate": 9.917716403347639e-06, "loss": 0.9317, "step": 2438 }, { "epoch": 0.08614987924358357, "grad_norm": 1.6455897092819214, "learning_rate": 9.917613024612652e-06, "loss": 0.9204, "step": 2439 }, { "epoch": 0.08618520104729148, "grad_norm": 1.7394349575042725, "learning_rate": 9.917509581516869e-06, "loss": 0.9058, "step": 2440 }, { "epoch": 0.08622052285099939, "grad_norm": 1.936294674873352, "learning_rate": 9.917406074061644e-06, "loss": 0.8825, "step": 2441 }, { "epoch": 0.08625584465470729, "grad_norm": 1.9258472919464111, "learning_rate": 9.917302502248335e-06, "loss": 0.9086, "step": 2442 }, { "epoch": 0.0862911664584152, "grad_norm": 1.6305819749832153, "learning_rate": 9.917198866078295e-06, "loss": 0.9021, "step": 2443 }, { "epoch": 0.08632648826212311, "grad_norm": 1.6749277114868164, "learning_rate": 9.91709516555288e-06, "loss": 0.8944, "step": 2444 }, { "epoch": 0.08636181006583102, "grad_norm": 1.8525999784469604, "learning_rate": 9.916991400673447e-06, "loss": 0.8985, "step": 2445 }, { "epoch": 0.08639713186953892, "grad_norm": 1.6306012868881226, "learning_rate": 9.916887571441356e-06, "loss": 0.8799, "step": 2446 }, { "epoch": 0.08643245367324683, "grad_norm": 1.8723351955413818, "learning_rate": 9.916783677857964e-06, "loss": 0.9101, "step": 2447 }, { "epoch": 0.08646777547695474, "grad_norm": 1.7266013622283936, "learning_rate": 9.91667971992463e-06, "loss": 0.927, "step": 2448 }, { "epoch": 0.08650309728066263, "grad_norm": 1.8841769695281982, "learning_rate": 9.91657569764272e-06, "loss": 0.9052, "step": 2449 }, { "epoch": 0.08653841908437054, "grad_norm": 1.7102030515670776, "learning_rate": 9.91647161101359e-06, "loss": 0.8878, "step": 2450 }, { "epoch": 0.08657374088807845, "grad_norm": 1.8956600427627563, "learning_rate": 9.916367460038602e-06, "loss": 0.9013, "step": 2451 }, { "epoch": 0.08660906269178635, "grad_norm": 1.9102959632873535, "learning_rate": 9.916263244719122e-06, "loss": 0.8982, "step": 2452 }, { "epoch": 0.08664438449549426, "grad_norm": 1.6543817520141602, "learning_rate": 9.916158965056512e-06, "loss": 0.8864, "step": 2453 }, { "epoch": 0.08667970629920217, "grad_norm": 1.8813201189041138, "learning_rate": 9.916054621052139e-06, "loss": 0.9061, "step": 2454 }, { "epoch": 0.08671502810291007, "grad_norm": 1.854653000831604, "learning_rate": 9.915950212707366e-06, "loss": 0.9231, "step": 2455 }, { "epoch": 0.08675034990661798, "grad_norm": 2.168501615524292, "learning_rate": 9.915845740023562e-06, "loss": 0.8543, "step": 2456 }, { "epoch": 0.08678567171032589, "grad_norm": 1.8675696849822998, "learning_rate": 9.915741203002092e-06, "loss": 0.9396, "step": 2457 }, { "epoch": 0.0868209935140338, "grad_norm": 1.7816344499588013, "learning_rate": 9.915636601644325e-06, "loss": 0.9029, "step": 2458 }, { "epoch": 0.0868563153177417, "grad_norm": 1.946414828300476, "learning_rate": 9.915531935951632e-06, "loss": 0.8779, "step": 2459 }, { "epoch": 0.08689163712144961, "grad_norm": 1.832387089729309, "learning_rate": 9.915427205925379e-06, "loss": 0.9457, "step": 2460 }, { "epoch": 0.08692695892515752, "grad_norm": 1.7786425352096558, "learning_rate": 9.91532241156694e-06, "loss": 0.9337, "step": 2461 }, { "epoch": 0.08696228072886542, "grad_norm": 1.874884009361267, "learning_rate": 9.915217552877686e-06, "loss": 0.8855, "step": 2462 }, { "epoch": 0.08699760253257333, "grad_norm": 1.813303828239441, "learning_rate": 9.915112629858986e-06, "loss": 0.9201, "step": 2463 }, { "epoch": 0.08703292433628124, "grad_norm": 1.79142165184021, "learning_rate": 9.915007642512217e-06, "loss": 0.8866, "step": 2464 }, { "epoch": 0.08706824613998913, "grad_norm": 1.831459879875183, "learning_rate": 9.914902590838752e-06, "loss": 0.8999, "step": 2465 }, { "epoch": 0.08710356794369704, "grad_norm": 2.295285224914551, "learning_rate": 9.914797474839966e-06, "loss": 0.9301, "step": 2466 }, { "epoch": 0.08713888974740495, "grad_norm": 1.7537814378738403, "learning_rate": 9.914692294517233e-06, "loss": 0.8955, "step": 2467 }, { "epoch": 0.08717421155111285, "grad_norm": 1.9543547630310059, "learning_rate": 9.914587049871934e-06, "loss": 0.8999, "step": 2468 }, { "epoch": 0.08720953335482076, "grad_norm": 1.7737094163894653, "learning_rate": 9.914481740905441e-06, "loss": 0.8869, "step": 2469 }, { "epoch": 0.08724485515852867, "grad_norm": 1.71476411819458, "learning_rate": 9.914376367619137e-06, "loss": 0.862, "step": 2470 }, { "epoch": 0.08728017696223658, "grad_norm": 1.8308045864105225, "learning_rate": 9.914270930014397e-06, "loss": 0.8913, "step": 2471 }, { "epoch": 0.08731549876594448, "grad_norm": 2.3387913703918457, "learning_rate": 9.914165428092603e-06, "loss": 0.879, "step": 2472 }, { "epoch": 0.08735082056965239, "grad_norm": 1.7754042148590088, "learning_rate": 9.914059861855138e-06, "loss": 0.8957, "step": 2473 }, { "epoch": 0.0873861423733603, "grad_norm": 1.8324110507965088, "learning_rate": 9.91395423130338e-06, "loss": 0.8987, "step": 2474 }, { "epoch": 0.0874214641770682, "grad_norm": 1.8745312690734863, "learning_rate": 9.913848536438711e-06, "loss": 0.9209, "step": 2475 }, { "epoch": 0.08745678598077611, "grad_norm": 1.9787557125091553, "learning_rate": 9.913742777262518e-06, "loss": 0.9272, "step": 2476 }, { "epoch": 0.08749210778448402, "grad_norm": 1.7751103639602661, "learning_rate": 9.913636953776183e-06, "loss": 0.8998, "step": 2477 }, { "epoch": 0.08752742958819192, "grad_norm": 1.762455701828003, "learning_rate": 9.91353106598109e-06, "loss": 0.8988, "step": 2478 }, { "epoch": 0.08756275139189983, "grad_norm": 1.768151879310608, "learning_rate": 9.913425113878627e-06, "loss": 0.8977, "step": 2479 }, { "epoch": 0.08759807319560774, "grad_norm": 2.005368232727051, "learning_rate": 9.91331909747018e-06, "loss": 0.909, "step": 2480 }, { "epoch": 0.08763339499931563, "grad_norm": 1.9334025382995605, "learning_rate": 9.913213016757135e-06, "loss": 0.9276, "step": 2481 }, { "epoch": 0.08766871680302354, "grad_norm": 1.8907455205917358, "learning_rate": 9.913106871740883e-06, "loss": 0.9061, "step": 2482 }, { "epoch": 0.08770403860673145, "grad_norm": 1.8857709169387817, "learning_rate": 9.91300066242281e-06, "loss": 0.8934, "step": 2483 }, { "epoch": 0.08773936041043937, "grad_norm": 1.807093858718872, "learning_rate": 9.912894388804311e-06, "loss": 0.8964, "step": 2484 }, { "epoch": 0.08777468221414726, "grad_norm": 1.6722157001495361, "learning_rate": 9.91278805088677e-06, "loss": 0.8899, "step": 2485 }, { "epoch": 0.08781000401785517, "grad_norm": 1.945876955986023, "learning_rate": 9.912681648671586e-06, "loss": 0.8727, "step": 2486 }, { "epoch": 0.08784532582156308, "grad_norm": 1.854512095451355, "learning_rate": 9.912575182160145e-06, "loss": 0.9432, "step": 2487 }, { "epoch": 0.08788064762527098, "grad_norm": 1.711374044418335, "learning_rate": 9.912468651353847e-06, "loss": 0.9327, "step": 2488 }, { "epoch": 0.08791596942897889, "grad_norm": 2.04267954826355, "learning_rate": 9.912362056254078e-06, "loss": 0.8798, "step": 2489 }, { "epoch": 0.0879512912326868, "grad_norm": 1.774778127670288, "learning_rate": 9.912255396862241e-06, "loss": 0.9187, "step": 2490 }, { "epoch": 0.0879866130363947, "grad_norm": 1.6778156757354736, "learning_rate": 9.912148673179729e-06, "loss": 0.9163, "step": 2491 }, { "epoch": 0.08802193484010261, "grad_norm": 1.8023875951766968, "learning_rate": 9.912041885207938e-06, "loss": 0.9467, "step": 2492 }, { "epoch": 0.08805725664381052, "grad_norm": 1.6178127527236938, "learning_rate": 9.911935032948265e-06, "loss": 0.8965, "step": 2493 }, { "epoch": 0.08809257844751842, "grad_norm": 2.0213191509246826, "learning_rate": 9.91182811640211e-06, "loss": 0.9518, "step": 2494 }, { "epoch": 0.08812790025122633, "grad_norm": 1.971821665763855, "learning_rate": 9.911721135570874e-06, "loss": 0.8889, "step": 2495 }, { "epoch": 0.08816322205493424, "grad_norm": 1.678253173828125, "learning_rate": 9.911614090455953e-06, "loss": 0.8577, "step": 2496 }, { "epoch": 0.08819854385864215, "grad_norm": 1.7743531465530396, "learning_rate": 9.911506981058752e-06, "loss": 0.9035, "step": 2497 }, { "epoch": 0.08823386566235004, "grad_norm": 1.8427739143371582, "learning_rate": 9.911399807380669e-06, "loss": 0.9044, "step": 2498 }, { "epoch": 0.08826918746605796, "grad_norm": 1.7269545793533325, "learning_rate": 9.911292569423109e-06, "loss": 0.9217, "step": 2499 }, { "epoch": 0.08830450926976587, "grad_norm": 1.7282506227493286, "learning_rate": 9.911185267187475e-06, "loss": 0.9202, "step": 2500 }, { "epoch": 0.08833983107347376, "grad_norm": 1.8660296201705933, "learning_rate": 9.911077900675172e-06, "loss": 0.8931, "step": 2501 }, { "epoch": 0.08837515287718167, "grad_norm": 1.961847186088562, "learning_rate": 9.910970469887604e-06, "loss": 0.9069, "step": 2502 }, { "epoch": 0.08841047468088958, "grad_norm": 1.9452394247055054, "learning_rate": 9.910862974826178e-06, "loss": 0.9187, "step": 2503 }, { "epoch": 0.08844579648459748, "grad_norm": 1.793247938156128, "learning_rate": 9.9107554154923e-06, "loss": 0.9233, "step": 2504 }, { "epoch": 0.08848111828830539, "grad_norm": 1.856652021408081, "learning_rate": 9.910647791887379e-06, "loss": 0.913, "step": 2505 }, { "epoch": 0.0885164400920133, "grad_norm": 1.7120081186294556, "learning_rate": 9.910540104012822e-06, "loss": 0.9065, "step": 2506 }, { "epoch": 0.0885517618957212, "grad_norm": 1.9204837083816528, "learning_rate": 9.910432351870041e-06, "loss": 0.9512, "step": 2507 }, { "epoch": 0.08858708369942911, "grad_norm": 1.9725531339645386, "learning_rate": 9.910324535460442e-06, "loss": 0.9293, "step": 2508 }, { "epoch": 0.08862240550313702, "grad_norm": 1.8561724424362183, "learning_rate": 9.91021665478544e-06, "loss": 0.8975, "step": 2509 }, { "epoch": 0.08865772730684493, "grad_norm": 1.889262080192566, "learning_rate": 9.910108709846445e-06, "loss": 0.9018, "step": 2510 }, { "epoch": 0.08869304911055283, "grad_norm": 2.2613892555236816, "learning_rate": 9.910000700644871e-06, "loss": 0.917, "step": 2511 }, { "epoch": 0.08872837091426074, "grad_norm": 1.8043792247772217, "learning_rate": 9.90989262718213e-06, "loss": 0.919, "step": 2512 }, { "epoch": 0.08876369271796865, "grad_norm": 1.7204203605651855, "learning_rate": 9.909784489459636e-06, "loss": 0.8943, "step": 2513 }, { "epoch": 0.08879901452167654, "grad_norm": 1.9971532821655273, "learning_rate": 9.909676287478807e-06, "loss": 0.8978, "step": 2514 }, { "epoch": 0.08883433632538446, "grad_norm": 1.7815377712249756, "learning_rate": 9.909568021241059e-06, "loss": 0.914, "step": 2515 }, { "epoch": 0.08886965812909237, "grad_norm": 1.855513572692871, "learning_rate": 9.909459690747805e-06, "loss": 0.894, "step": 2516 }, { "epoch": 0.08890497993280026, "grad_norm": 1.8423435688018799, "learning_rate": 9.909351296000467e-06, "loss": 0.8695, "step": 2517 }, { "epoch": 0.08894030173650817, "grad_norm": 1.8233789205551147, "learning_rate": 9.909242837000462e-06, "loss": 0.9017, "step": 2518 }, { "epoch": 0.08897562354021608, "grad_norm": 1.6477750539779663, "learning_rate": 9.909134313749209e-06, "loss": 0.8469, "step": 2519 }, { "epoch": 0.08901094534392398, "grad_norm": 2.5883779525756836, "learning_rate": 9.909025726248129e-06, "loss": 0.8882, "step": 2520 }, { "epoch": 0.08904626714763189, "grad_norm": 1.8075356483459473, "learning_rate": 9.908917074498642e-06, "loss": 0.9283, "step": 2521 }, { "epoch": 0.0890815889513398, "grad_norm": 1.7825089693069458, "learning_rate": 9.908808358502173e-06, "loss": 0.907, "step": 2522 }, { "epoch": 0.08911691075504771, "grad_norm": 1.830851674079895, "learning_rate": 9.908699578260143e-06, "loss": 0.8969, "step": 2523 }, { "epoch": 0.08915223255875561, "grad_norm": 1.863176941871643, "learning_rate": 9.908590733773974e-06, "loss": 0.9026, "step": 2524 }, { "epoch": 0.08918755436246352, "grad_norm": 1.9264262914657593, "learning_rate": 9.908481825045095e-06, "loss": 0.9145, "step": 2525 }, { "epoch": 0.08922287616617143, "grad_norm": 1.9940599203109741, "learning_rate": 9.908372852074926e-06, "loss": 0.9013, "step": 2526 }, { "epoch": 0.08925819796987933, "grad_norm": 1.914190649986267, "learning_rate": 9.908263814864898e-06, "loss": 0.8884, "step": 2527 }, { "epoch": 0.08929351977358724, "grad_norm": 2.04294490814209, "learning_rate": 9.908154713416434e-06, "loss": 0.8782, "step": 2528 }, { "epoch": 0.08932884157729515, "grad_norm": 1.947410225868225, "learning_rate": 9.908045547730964e-06, "loss": 0.9098, "step": 2529 }, { "epoch": 0.08936416338100305, "grad_norm": 1.8254623413085938, "learning_rate": 9.907936317809917e-06, "loss": 0.868, "step": 2530 }, { "epoch": 0.08939948518471096, "grad_norm": 1.7575652599334717, "learning_rate": 9.90782702365472e-06, "loss": 0.9062, "step": 2531 }, { "epoch": 0.08943480698841887, "grad_norm": 2.08148193359375, "learning_rate": 9.907717665266807e-06, "loss": 0.9254, "step": 2532 }, { "epoch": 0.08947012879212678, "grad_norm": 2.0153732299804688, "learning_rate": 9.907608242647609e-06, "loss": 0.9242, "step": 2533 }, { "epoch": 0.08950545059583467, "grad_norm": 2.178276777267456, "learning_rate": 9.907498755798555e-06, "loss": 0.9121, "step": 2534 }, { "epoch": 0.08954077239954258, "grad_norm": 1.9466578960418701, "learning_rate": 9.907389204721081e-06, "loss": 0.9289, "step": 2535 }, { "epoch": 0.0895760942032505, "grad_norm": 1.8950294256210327, "learning_rate": 9.907279589416618e-06, "loss": 0.8968, "step": 2536 }, { "epoch": 0.08961141600695839, "grad_norm": 1.8236677646636963, "learning_rate": 9.907169909886604e-06, "loss": 0.9053, "step": 2537 }, { "epoch": 0.0896467378106663, "grad_norm": 1.820882797241211, "learning_rate": 9.907060166132473e-06, "loss": 0.9069, "step": 2538 }, { "epoch": 0.08968205961437421, "grad_norm": 1.8988077640533447, "learning_rate": 9.90695035815566e-06, "loss": 0.9087, "step": 2539 }, { "epoch": 0.08971738141808211, "grad_norm": 2.1378204822540283, "learning_rate": 9.906840485957602e-06, "loss": 0.9159, "step": 2540 }, { "epoch": 0.08975270322179002, "grad_norm": 1.8261243104934692, "learning_rate": 9.90673054953974e-06, "loss": 0.9109, "step": 2541 }, { "epoch": 0.08978802502549793, "grad_norm": 1.933684229850769, "learning_rate": 9.906620548903509e-06, "loss": 0.8953, "step": 2542 }, { "epoch": 0.08982334682920583, "grad_norm": 1.128161072731018, "learning_rate": 9.906510484050354e-06, "loss": 0.618, "step": 2543 }, { "epoch": 0.08985866863291374, "grad_norm": 2.122636079788208, "learning_rate": 9.906400354981709e-06, "loss": 0.9223, "step": 2544 }, { "epoch": 0.08989399043662165, "grad_norm": 1.9459296464920044, "learning_rate": 9.90629016169902e-06, "loss": 0.924, "step": 2545 }, { "epoch": 0.08992931224032956, "grad_norm": 2.3333024978637695, "learning_rate": 9.906179904203725e-06, "loss": 0.9575, "step": 2546 }, { "epoch": 0.08996463404403746, "grad_norm": 1.9643126726150513, "learning_rate": 9.906069582497273e-06, "loss": 0.9014, "step": 2547 }, { "epoch": 0.08999995584774537, "grad_norm": 1.7915465831756592, "learning_rate": 9.905959196581103e-06, "loss": 0.8965, "step": 2548 }, { "epoch": 0.09003527765145328, "grad_norm": 1.9300650358200073, "learning_rate": 9.90584874645666e-06, "loss": 0.9068, "step": 2549 }, { "epoch": 0.09007059945516117, "grad_norm": 1.898716688156128, "learning_rate": 9.905738232125394e-06, "loss": 0.917, "step": 2550 }, { "epoch": 0.09010592125886908, "grad_norm": 2.380345344543457, "learning_rate": 9.905627653588746e-06, "loss": 0.9048, "step": 2551 }, { "epoch": 0.090141243062577, "grad_norm": 1.820448398590088, "learning_rate": 9.905517010848166e-06, "loss": 0.8867, "step": 2552 }, { "epoch": 0.09017656486628489, "grad_norm": 1.9024964570999146, "learning_rate": 9.905406303905101e-06, "loss": 0.9126, "step": 2553 }, { "epoch": 0.0902118866699928, "grad_norm": 1.8943392038345337, "learning_rate": 9.905295532761e-06, "loss": 0.9142, "step": 2554 }, { "epoch": 0.09024720847370071, "grad_norm": 1.776167631149292, "learning_rate": 9.905184697417316e-06, "loss": 0.9055, "step": 2555 }, { "epoch": 0.09028253027740861, "grad_norm": 1.6900150775909424, "learning_rate": 9.905073797875492e-06, "loss": 0.8774, "step": 2556 }, { "epoch": 0.09031785208111652, "grad_norm": 1.9615623950958252, "learning_rate": 9.904962834136988e-06, "loss": 0.8849, "step": 2557 }, { "epoch": 0.09035317388482443, "grad_norm": 1.8011740446090698, "learning_rate": 9.904851806203252e-06, "loss": 0.9066, "step": 2558 }, { "epoch": 0.09038849568853234, "grad_norm": 1.7517848014831543, "learning_rate": 9.904740714075737e-06, "loss": 0.8795, "step": 2559 }, { "epoch": 0.09042381749224024, "grad_norm": 1.7937650680541992, "learning_rate": 9.904629557755898e-06, "loss": 0.9551, "step": 2560 }, { "epoch": 0.09045913929594815, "grad_norm": 1.6186096668243408, "learning_rate": 9.904518337245188e-06, "loss": 0.8527, "step": 2561 }, { "epoch": 0.09049446109965606, "grad_norm": 2.0409648418426514, "learning_rate": 9.904407052545064e-06, "loss": 0.8769, "step": 2562 }, { "epoch": 0.09052978290336396, "grad_norm": 1.8720027208328247, "learning_rate": 9.904295703656984e-06, "loss": 0.8917, "step": 2563 }, { "epoch": 0.09056510470707187, "grad_norm": 1.827854871749878, "learning_rate": 9.904184290582403e-06, "loss": 0.8779, "step": 2564 }, { "epoch": 0.09060042651077978, "grad_norm": 1.670790672302246, "learning_rate": 9.90407281332278e-06, "loss": 0.8927, "step": 2565 }, { "epoch": 0.09063574831448767, "grad_norm": 1.9856334924697876, "learning_rate": 9.903961271879574e-06, "loss": 0.8676, "step": 2566 }, { "epoch": 0.09067107011819558, "grad_norm": 1.8672560453414917, "learning_rate": 9.903849666254243e-06, "loss": 0.8896, "step": 2567 }, { "epoch": 0.0907063919219035, "grad_norm": 2.4963643550872803, "learning_rate": 9.90373799644825e-06, "loss": 0.928, "step": 2568 }, { "epoch": 0.09074171372561139, "grad_norm": 6.805139541625977, "learning_rate": 9.903626262463058e-06, "loss": 0.9012, "step": 2569 }, { "epoch": 0.0907770355293193, "grad_norm": 1.6889925003051758, "learning_rate": 9.903514464300125e-06, "loss": 0.8606, "step": 2570 }, { "epoch": 0.09081235733302721, "grad_norm": 2.0387532711029053, "learning_rate": 9.90340260196092e-06, "loss": 0.8895, "step": 2571 }, { "epoch": 0.09084767913673512, "grad_norm": 1.6746509075164795, "learning_rate": 9.9032906754469e-06, "loss": 0.9043, "step": 2572 }, { "epoch": 0.09088300094044302, "grad_norm": 1.6236519813537598, "learning_rate": 9.903178684759534e-06, "loss": 0.9329, "step": 2573 }, { "epoch": 0.09091832274415093, "grad_norm": 1.743453025817871, "learning_rate": 9.903066629900287e-06, "loss": 0.9069, "step": 2574 }, { "epoch": 0.09095364454785884, "grad_norm": 1.9481353759765625, "learning_rate": 9.902954510870626e-06, "loss": 0.8748, "step": 2575 }, { "epoch": 0.09098896635156674, "grad_norm": 1.8609859943389893, "learning_rate": 9.902842327672018e-06, "loss": 0.9255, "step": 2576 }, { "epoch": 0.09102428815527465, "grad_norm": 1.662338137626648, "learning_rate": 9.902730080305931e-06, "loss": 0.8801, "step": 2577 }, { "epoch": 0.09105960995898256, "grad_norm": 2.0880579948425293, "learning_rate": 9.902617768773834e-06, "loss": 0.9263, "step": 2578 }, { "epoch": 0.09109493176269046, "grad_norm": 1.5758848190307617, "learning_rate": 9.902505393077197e-06, "loss": 0.8805, "step": 2579 }, { "epoch": 0.09113025356639837, "grad_norm": 1.6835931539535522, "learning_rate": 9.902392953217492e-06, "loss": 0.6315, "step": 2580 }, { "epoch": 0.09116557537010628, "grad_norm": 1.9297033548355103, "learning_rate": 9.90228044919619e-06, "loss": 0.8833, "step": 2581 }, { "epoch": 0.09120089717381417, "grad_norm": 2.0083866119384766, "learning_rate": 9.902167881014762e-06, "loss": 0.9222, "step": 2582 }, { "epoch": 0.09123621897752208, "grad_norm": 2.0723283290863037, "learning_rate": 9.902055248674682e-06, "loss": 0.8722, "step": 2583 }, { "epoch": 0.09127154078123, "grad_norm": 1.8797963857650757, "learning_rate": 9.901942552177427e-06, "loss": 0.9192, "step": 2584 }, { "epoch": 0.0913068625849379, "grad_norm": 1.8732465505599976, "learning_rate": 9.901829791524466e-06, "loss": 0.9043, "step": 2585 }, { "epoch": 0.0913421843886458, "grad_norm": 1.8498536348342896, "learning_rate": 9.901716966717279e-06, "loss": 0.9046, "step": 2586 }, { "epoch": 0.09137750619235371, "grad_norm": 1.9009400606155396, "learning_rate": 9.901604077757343e-06, "loss": 0.928, "step": 2587 }, { "epoch": 0.09141282799606162, "grad_norm": 2.062833547592163, "learning_rate": 9.901491124646133e-06, "loss": 0.8935, "step": 2588 }, { "epoch": 0.09144814979976952, "grad_norm": 1.945237398147583, "learning_rate": 9.901378107385129e-06, "loss": 0.9023, "step": 2589 }, { "epoch": 0.09148347160347743, "grad_norm": 1.9462496042251587, "learning_rate": 9.90126502597581e-06, "loss": 0.9632, "step": 2590 }, { "epoch": 0.09151879340718534, "grad_norm": 1.8272744417190552, "learning_rate": 9.901151880419654e-06, "loss": 0.9289, "step": 2591 }, { "epoch": 0.09155411521089324, "grad_norm": 1.8412829637527466, "learning_rate": 9.901038670718146e-06, "loss": 0.8718, "step": 2592 }, { "epoch": 0.09158943701460115, "grad_norm": 1.822526216506958, "learning_rate": 9.900925396872763e-06, "loss": 0.8949, "step": 2593 }, { "epoch": 0.09162475881830906, "grad_norm": 1.7806798219680786, "learning_rate": 9.900812058884992e-06, "loss": 0.8988, "step": 2594 }, { "epoch": 0.09166008062201696, "grad_norm": 1.6291313171386719, "learning_rate": 9.900698656756312e-06, "loss": 0.895, "step": 2595 }, { "epoch": 0.09169540242572487, "grad_norm": 1.8149076700210571, "learning_rate": 9.90058519048821e-06, "loss": 0.8966, "step": 2596 }, { "epoch": 0.09173072422943278, "grad_norm": 1.765407919883728, "learning_rate": 9.900471660082171e-06, "loss": 0.8954, "step": 2597 }, { "epoch": 0.09176604603314069, "grad_norm": 1.6796807050704956, "learning_rate": 9.900358065539679e-06, "loss": 0.9083, "step": 2598 }, { "epoch": 0.09180136783684859, "grad_norm": 1.7765374183654785, "learning_rate": 9.900244406862221e-06, "loss": 0.8616, "step": 2599 }, { "epoch": 0.0918366896405565, "grad_norm": 1.7909626960754395, "learning_rate": 9.900130684051286e-06, "loss": 0.9513, "step": 2600 }, { "epoch": 0.0918720114442644, "grad_norm": 1.9528170824050903, "learning_rate": 9.900016897108363e-06, "loss": 0.8804, "step": 2601 }, { "epoch": 0.0919073332479723, "grad_norm": 1.745252013206482, "learning_rate": 9.89990304603494e-06, "loss": 0.896, "step": 2602 }, { "epoch": 0.09194265505168021, "grad_norm": 1.833670735359192, "learning_rate": 9.899789130832505e-06, "loss": 0.8753, "step": 2603 }, { "epoch": 0.09197797685538812, "grad_norm": 2.227205276489258, "learning_rate": 9.899675151502552e-06, "loss": 0.9003, "step": 2604 }, { "epoch": 0.09201329865909602, "grad_norm": 1.817020058631897, "learning_rate": 9.899561108046572e-06, "loss": 0.8626, "step": 2605 }, { "epoch": 0.09204862046280393, "grad_norm": 1.7801874876022339, "learning_rate": 9.899447000466057e-06, "loss": 0.8805, "step": 2606 }, { "epoch": 0.09208394226651184, "grad_norm": 1.8072055578231812, "learning_rate": 9.8993328287625e-06, "loss": 0.8636, "step": 2607 }, { "epoch": 0.09211926407021974, "grad_norm": 1.835891842842102, "learning_rate": 9.899218592937396e-06, "loss": 0.871, "step": 2608 }, { "epoch": 0.09215458587392765, "grad_norm": 1.7788777351379395, "learning_rate": 9.899104292992241e-06, "loss": 0.8748, "step": 2609 }, { "epoch": 0.09218990767763556, "grad_norm": 2.0929770469665527, "learning_rate": 9.89898992892853e-06, "loss": 0.8793, "step": 2610 }, { "epoch": 0.09222522948134347, "grad_norm": 1.7196335792541504, "learning_rate": 9.89887550074776e-06, "loss": 0.8562, "step": 2611 }, { "epoch": 0.09226055128505137, "grad_norm": 1.2336076498031616, "learning_rate": 9.898761008451428e-06, "loss": 0.609, "step": 2612 }, { "epoch": 0.09229587308875928, "grad_norm": 2.1132264137268066, "learning_rate": 9.898646452041031e-06, "loss": 0.892, "step": 2613 }, { "epoch": 0.09233119489246719, "grad_norm": 2.1090481281280518, "learning_rate": 9.898531831518072e-06, "loss": 0.907, "step": 2614 }, { "epoch": 0.09236651669617509, "grad_norm": 2.2255477905273438, "learning_rate": 9.898417146884048e-06, "loss": 0.8784, "step": 2615 }, { "epoch": 0.092401838499883, "grad_norm": 0.9694473743438721, "learning_rate": 9.898302398140461e-06, "loss": 0.585, "step": 2616 }, { "epoch": 0.0924371603035909, "grad_norm": 2.3279807567596436, "learning_rate": 9.898187585288815e-06, "loss": 0.9212, "step": 2617 }, { "epoch": 0.0924724821072988, "grad_norm": 2.735949754714966, "learning_rate": 9.898072708330609e-06, "loss": 0.8892, "step": 2618 }, { "epoch": 0.09250780391100671, "grad_norm": 2.6613852977752686, "learning_rate": 9.897957767267348e-06, "loss": 0.9121, "step": 2619 }, { "epoch": 0.09254312571471462, "grad_norm": 1.987533688545227, "learning_rate": 9.897842762100537e-06, "loss": 0.8572, "step": 2620 }, { "epoch": 0.09257844751842252, "grad_norm": 2.33990740776062, "learning_rate": 9.89772769283168e-06, "loss": 0.8858, "step": 2621 }, { "epoch": 0.09261376932213043, "grad_norm": 2.053786039352417, "learning_rate": 9.897612559462285e-06, "loss": 0.9236, "step": 2622 }, { "epoch": 0.09264909112583834, "grad_norm": 1.7526780366897583, "learning_rate": 9.897497361993858e-06, "loss": 0.8802, "step": 2623 }, { "epoch": 0.09268441292954625, "grad_norm": 1.9845716953277588, "learning_rate": 9.897382100427904e-06, "loss": 0.8757, "step": 2624 }, { "epoch": 0.09271973473325415, "grad_norm": 1.7972990274429321, "learning_rate": 9.897266774765934e-06, "loss": 0.9213, "step": 2625 }, { "epoch": 0.09275505653696206, "grad_norm": 1.7463432550430298, "learning_rate": 9.897151385009458e-06, "loss": 0.8733, "step": 2626 }, { "epoch": 0.09279037834066997, "grad_norm": 1.7705744504928589, "learning_rate": 9.897035931159984e-06, "loss": 0.8987, "step": 2627 }, { "epoch": 0.09282570014437787, "grad_norm": 2.006330728530884, "learning_rate": 9.896920413219026e-06, "loss": 0.8846, "step": 2628 }, { "epoch": 0.09286102194808578, "grad_norm": 1.9573181867599487, "learning_rate": 9.896804831188092e-06, "loss": 0.9515, "step": 2629 }, { "epoch": 0.09289634375179369, "grad_norm": 1.8307440280914307, "learning_rate": 9.896689185068699e-06, "loss": 0.8532, "step": 2630 }, { "epoch": 0.09293166555550159, "grad_norm": 1.7105096578598022, "learning_rate": 9.896573474862358e-06, "loss": 0.9045, "step": 2631 }, { "epoch": 0.0929669873592095, "grad_norm": 1.7129831314086914, "learning_rate": 9.896457700570583e-06, "loss": 0.9177, "step": 2632 }, { "epoch": 0.09300230916291741, "grad_norm": 1.8448196649551392, "learning_rate": 9.89634186219489e-06, "loss": 0.8943, "step": 2633 }, { "epoch": 0.09303763096662532, "grad_norm": 1.818848729133606, "learning_rate": 9.896225959736797e-06, "loss": 0.8918, "step": 2634 }, { "epoch": 0.09307295277033321, "grad_norm": 1.986699104309082, "learning_rate": 9.896109993197816e-06, "loss": 0.9105, "step": 2635 }, { "epoch": 0.09310827457404112, "grad_norm": 1.8054001331329346, "learning_rate": 9.89599396257947e-06, "loss": 0.8906, "step": 2636 }, { "epoch": 0.09314359637774904, "grad_norm": 1.8585774898529053, "learning_rate": 9.895877867883275e-06, "loss": 0.8974, "step": 2637 }, { "epoch": 0.09317891818145693, "grad_norm": 1.934727668762207, "learning_rate": 9.89576170911075e-06, "loss": 0.8703, "step": 2638 }, { "epoch": 0.09321423998516484, "grad_norm": 2.0881524085998535, "learning_rate": 9.895645486263416e-06, "loss": 0.8786, "step": 2639 }, { "epoch": 0.09324956178887275, "grad_norm": 2.028766632080078, "learning_rate": 9.895529199342795e-06, "loss": 0.925, "step": 2640 }, { "epoch": 0.09328488359258065, "grad_norm": 2.0458226203918457, "learning_rate": 9.895412848350407e-06, "loss": 0.8949, "step": 2641 }, { "epoch": 0.09332020539628856, "grad_norm": 1.6700199842453003, "learning_rate": 9.895296433287777e-06, "loss": 0.9124, "step": 2642 }, { "epoch": 0.09335552719999647, "grad_norm": 1.7471026182174683, "learning_rate": 9.895179954156427e-06, "loss": 0.8779, "step": 2643 }, { "epoch": 0.09339084900370437, "grad_norm": 1.9137409925460815, "learning_rate": 9.895063410957882e-06, "loss": 0.8964, "step": 2644 }, { "epoch": 0.09342617080741228, "grad_norm": 1.74927818775177, "learning_rate": 9.894946803693668e-06, "loss": 0.8828, "step": 2645 }, { "epoch": 0.09346149261112019, "grad_norm": 1.6953282356262207, "learning_rate": 9.89483013236531e-06, "loss": 0.907, "step": 2646 }, { "epoch": 0.0934968144148281, "grad_norm": 1.7986867427825928, "learning_rate": 9.894713396974335e-06, "loss": 0.8734, "step": 2647 }, { "epoch": 0.093532136218536, "grad_norm": 1.7940071821212769, "learning_rate": 9.894596597522272e-06, "loss": 0.9005, "step": 2648 }, { "epoch": 0.09356745802224391, "grad_norm": 1.7925039529800415, "learning_rate": 9.894479734010647e-06, "loss": 0.8548, "step": 2649 }, { "epoch": 0.09360277982595182, "grad_norm": 1.7292238473892212, "learning_rate": 9.894362806440993e-06, "loss": 0.8823, "step": 2650 }, { "epoch": 0.09363810162965971, "grad_norm": 1.8477559089660645, "learning_rate": 9.894245814814839e-06, "loss": 0.8787, "step": 2651 }, { "epoch": 0.09367342343336763, "grad_norm": 1.9551148414611816, "learning_rate": 9.894128759133713e-06, "loss": 0.908, "step": 2652 }, { "epoch": 0.09370874523707554, "grad_norm": 1.7028610706329346, "learning_rate": 9.894011639399153e-06, "loss": 0.8892, "step": 2653 }, { "epoch": 0.09374406704078343, "grad_norm": 1.7827763557434082, "learning_rate": 9.893894455612685e-06, "loss": 0.8849, "step": 2654 }, { "epoch": 0.09377938884449134, "grad_norm": 1.9103341102600098, "learning_rate": 9.89377720777585e-06, "loss": 0.9565, "step": 2655 }, { "epoch": 0.09381471064819925, "grad_norm": 1.683009386062622, "learning_rate": 9.893659895890178e-06, "loss": 0.9102, "step": 2656 }, { "epoch": 0.09385003245190715, "grad_norm": 1.9235939979553223, "learning_rate": 9.893542519957205e-06, "loss": 0.895, "step": 2657 }, { "epoch": 0.09388535425561506, "grad_norm": 1.802773356437683, "learning_rate": 9.893425079978465e-06, "loss": 0.8962, "step": 2658 }, { "epoch": 0.09392067605932297, "grad_norm": 1.7453298568725586, "learning_rate": 9.8933075759555e-06, "loss": 0.8807, "step": 2659 }, { "epoch": 0.09395599786303088, "grad_norm": 1.90660560131073, "learning_rate": 9.893190007889844e-06, "loss": 0.9082, "step": 2660 }, { "epoch": 0.09399131966673878, "grad_norm": 1.6730895042419434, "learning_rate": 9.893072375783038e-06, "loss": 0.8819, "step": 2661 }, { "epoch": 0.09402664147044669, "grad_norm": 1.9079930782318115, "learning_rate": 9.89295467963662e-06, "loss": 0.9008, "step": 2662 }, { "epoch": 0.0940619632741546, "grad_norm": 1.6999913454055786, "learning_rate": 9.892836919452131e-06, "loss": 0.8905, "step": 2663 }, { "epoch": 0.0940972850778625, "grad_norm": 1.8331400156021118, "learning_rate": 9.892719095231114e-06, "loss": 0.8531, "step": 2664 }, { "epoch": 0.09413260688157041, "grad_norm": 1.9923985004425049, "learning_rate": 9.892601206975106e-06, "loss": 0.8816, "step": 2665 }, { "epoch": 0.09416792868527832, "grad_norm": 1.9073855876922607, "learning_rate": 9.892483254685654e-06, "loss": 0.8848, "step": 2666 }, { "epoch": 0.09420325048898621, "grad_norm": 1.7816072702407837, "learning_rate": 9.892365238364304e-06, "loss": 0.8905, "step": 2667 }, { "epoch": 0.09423857229269413, "grad_norm": 1.7910032272338867, "learning_rate": 9.892247158012594e-06, "loss": 0.8648, "step": 2668 }, { "epoch": 0.09427389409640204, "grad_norm": 2.0875117778778076, "learning_rate": 9.892129013632073e-06, "loss": 0.8963, "step": 2669 }, { "epoch": 0.09430921590010993, "grad_norm": 1.7691152095794678, "learning_rate": 9.892010805224289e-06, "loss": 0.8755, "step": 2670 }, { "epoch": 0.09434453770381784, "grad_norm": 1.8378931283950806, "learning_rate": 9.891892532790785e-06, "loss": 0.8747, "step": 2671 }, { "epoch": 0.09437985950752575, "grad_norm": 1.8259683847427368, "learning_rate": 9.891774196333113e-06, "loss": 0.8811, "step": 2672 }, { "epoch": 0.09441518131123366, "grad_norm": 1.658488392829895, "learning_rate": 9.891655795852819e-06, "loss": 0.8646, "step": 2673 }, { "epoch": 0.09445050311494156, "grad_norm": 1.8103313446044922, "learning_rate": 9.891537331351454e-06, "loss": 0.8886, "step": 2674 }, { "epoch": 0.09448582491864947, "grad_norm": 1.7039954662322998, "learning_rate": 9.891418802830566e-06, "loss": 0.8747, "step": 2675 }, { "epoch": 0.09452114672235738, "grad_norm": 2.6381120681762695, "learning_rate": 9.89130021029171e-06, "loss": 0.9392, "step": 2676 }, { "epoch": 0.09455646852606528, "grad_norm": 1.738592505455017, "learning_rate": 9.891181553736435e-06, "loss": 0.8637, "step": 2677 }, { "epoch": 0.09459179032977319, "grad_norm": 1.7406787872314453, "learning_rate": 9.891062833166297e-06, "loss": 0.9046, "step": 2678 }, { "epoch": 0.0946271121334811, "grad_norm": 1.7617518901824951, "learning_rate": 9.890944048582846e-06, "loss": 0.9134, "step": 2679 }, { "epoch": 0.094662433937189, "grad_norm": 1.6280401945114136, "learning_rate": 9.89082519998764e-06, "loss": 0.8431, "step": 2680 }, { "epoch": 0.09469775574089691, "grad_norm": 1.6631097793579102, "learning_rate": 9.890706287382233e-06, "loss": 0.8893, "step": 2681 }, { "epoch": 0.09473307754460482, "grad_norm": 1.9815171957015991, "learning_rate": 9.890587310768182e-06, "loss": 0.938, "step": 2682 }, { "epoch": 0.09476839934831272, "grad_norm": 2.8152685165405273, "learning_rate": 9.890468270147042e-06, "loss": 0.9183, "step": 2683 }, { "epoch": 0.09480372115202063, "grad_norm": 1.8266161680221558, "learning_rate": 9.890349165520375e-06, "loss": 0.9294, "step": 2684 }, { "epoch": 0.09483904295572854, "grad_norm": 1.8115882873535156, "learning_rate": 9.890229996889735e-06, "loss": 0.9314, "step": 2685 }, { "epoch": 0.09487436475943645, "grad_norm": 1.7761335372924805, "learning_rate": 9.890110764256686e-06, "loss": 0.898, "step": 2686 }, { "epoch": 0.09490968656314434, "grad_norm": 1.8379387855529785, "learning_rate": 9.889991467622786e-06, "loss": 0.8789, "step": 2687 }, { "epoch": 0.09494500836685225, "grad_norm": 2.6805667877197266, "learning_rate": 9.889872106989596e-06, "loss": 0.9169, "step": 2688 }, { "epoch": 0.09498033017056016, "grad_norm": 1.6923226118087769, "learning_rate": 9.88975268235868e-06, "loss": 0.8985, "step": 2689 }, { "epoch": 0.09501565197426806, "grad_norm": 1.8259801864624023, "learning_rate": 9.8896331937316e-06, "loss": 0.8826, "step": 2690 }, { "epoch": 0.09505097377797597, "grad_norm": 1.8920791149139404, "learning_rate": 9.889513641109918e-06, "loss": 0.8927, "step": 2691 }, { "epoch": 0.09508629558168388, "grad_norm": 1.7835071086883545, "learning_rate": 9.889394024495203e-06, "loss": 0.8844, "step": 2692 }, { "epoch": 0.09512161738539178, "grad_norm": 1.6674610376358032, "learning_rate": 9.889274343889016e-06, "loss": 0.8602, "step": 2693 }, { "epoch": 0.09515693918909969, "grad_norm": 2.1040189266204834, "learning_rate": 9.889154599292928e-06, "loss": 0.898, "step": 2694 }, { "epoch": 0.0951922609928076, "grad_norm": 1.7725110054016113, "learning_rate": 9.889034790708503e-06, "loss": 0.8862, "step": 2695 }, { "epoch": 0.0952275827965155, "grad_norm": 1.9684290885925293, "learning_rate": 9.888914918137309e-06, "loss": 0.9093, "step": 2696 }, { "epoch": 0.09526290460022341, "grad_norm": 2.1025936603546143, "learning_rate": 9.888794981580915e-06, "loss": 0.8999, "step": 2697 }, { "epoch": 0.09529822640393132, "grad_norm": 2.041752576828003, "learning_rate": 9.888674981040893e-06, "loss": 0.9494, "step": 2698 }, { "epoch": 0.09533354820763923, "grad_norm": 1.8786650896072388, "learning_rate": 9.88855491651881e-06, "loss": 0.8958, "step": 2699 }, { "epoch": 0.09536887001134713, "grad_norm": 1.8314136266708374, "learning_rate": 9.888434788016241e-06, "loss": 0.8945, "step": 2700 }, { "epoch": 0.09540419181505504, "grad_norm": 2.0800533294677734, "learning_rate": 9.888314595534755e-06, "loss": 0.8894, "step": 2701 }, { "epoch": 0.09543951361876295, "grad_norm": 2.0880980491638184, "learning_rate": 9.888194339075927e-06, "loss": 0.8945, "step": 2702 }, { "epoch": 0.09547483542247084, "grad_norm": 1.8650741577148438, "learning_rate": 9.88807401864133e-06, "loss": 0.8854, "step": 2703 }, { "epoch": 0.09551015722617875, "grad_norm": 1.7992868423461914, "learning_rate": 9.88795363423254e-06, "loss": 0.875, "step": 2704 }, { "epoch": 0.09554547902988667, "grad_norm": 1.7991535663604736, "learning_rate": 9.88783318585113e-06, "loss": 0.9134, "step": 2705 }, { "epoch": 0.09558080083359456, "grad_norm": 1.4451725482940674, "learning_rate": 9.88771267349868e-06, "loss": 0.6287, "step": 2706 }, { "epoch": 0.09561612263730247, "grad_norm": 2.0907013416290283, "learning_rate": 9.887592097176764e-06, "loss": 0.8618, "step": 2707 }, { "epoch": 0.09565144444101038, "grad_norm": 1.9507098197937012, "learning_rate": 9.887471456886962e-06, "loss": 0.9144, "step": 2708 }, { "epoch": 0.09568676624471828, "grad_norm": 1.7531070709228516, "learning_rate": 9.887350752630854e-06, "loss": 0.8649, "step": 2709 }, { "epoch": 0.09572208804842619, "grad_norm": 2.1151983737945557, "learning_rate": 9.887229984410014e-06, "loss": 0.8627, "step": 2710 }, { "epoch": 0.0957574098521341, "grad_norm": 2.2410905361175537, "learning_rate": 9.88710915222603e-06, "loss": 0.8897, "step": 2711 }, { "epoch": 0.09579273165584201, "grad_norm": 2.5239408016204834, "learning_rate": 9.886988256080478e-06, "loss": 0.9271, "step": 2712 }, { "epoch": 0.09582805345954991, "grad_norm": 2.3135783672332764, "learning_rate": 9.886867295974945e-06, "loss": 0.8996, "step": 2713 }, { "epoch": 0.09586337526325782, "grad_norm": 2.115602493286133, "learning_rate": 9.886746271911009e-06, "loss": 0.9023, "step": 2714 }, { "epoch": 0.09589869706696573, "grad_norm": 2.0047545433044434, "learning_rate": 9.886625183890258e-06, "loss": 0.8923, "step": 2715 }, { "epoch": 0.09593401887067363, "grad_norm": 1.8100897073745728, "learning_rate": 9.886504031914274e-06, "loss": 0.8921, "step": 2716 }, { "epoch": 0.09596934067438154, "grad_norm": 2.5602235794067383, "learning_rate": 9.886382815984643e-06, "loss": 0.8587, "step": 2717 }, { "epoch": 0.09600466247808945, "grad_norm": 2.0201706886291504, "learning_rate": 9.886261536102955e-06, "loss": 0.8864, "step": 2718 }, { "epoch": 0.09603998428179734, "grad_norm": 1.2185481786727905, "learning_rate": 9.886140192270792e-06, "loss": 0.638, "step": 2719 }, { "epoch": 0.09607530608550525, "grad_norm": 2.065774917602539, "learning_rate": 9.886018784489746e-06, "loss": 0.94, "step": 2720 }, { "epoch": 0.09611062788921317, "grad_norm": 2.156707763671875, "learning_rate": 9.885897312761403e-06, "loss": 0.9373, "step": 2721 }, { "epoch": 0.09614594969292106, "grad_norm": 1.9182536602020264, "learning_rate": 9.885775777087356e-06, "loss": 0.8451, "step": 2722 }, { "epoch": 0.09618127149662897, "grad_norm": 2.140547513961792, "learning_rate": 9.885654177469195e-06, "loss": 0.8906, "step": 2723 }, { "epoch": 0.09621659330033688, "grad_norm": 2.159660577774048, "learning_rate": 9.885532513908509e-06, "loss": 0.9107, "step": 2724 }, { "epoch": 0.0962519151040448, "grad_norm": 1.8949187994003296, "learning_rate": 9.885410786406892e-06, "loss": 0.8868, "step": 2725 }, { "epoch": 0.09628723690775269, "grad_norm": 1.8721283674240112, "learning_rate": 9.885288994965937e-06, "loss": 0.921, "step": 2726 }, { "epoch": 0.0963225587114606, "grad_norm": 2.470515489578247, "learning_rate": 9.885167139587238e-06, "loss": 0.9144, "step": 2727 }, { "epoch": 0.09635788051516851, "grad_norm": 1.9099234342575073, "learning_rate": 9.88504522027239e-06, "loss": 0.9208, "step": 2728 }, { "epoch": 0.09639320231887641, "grad_norm": 1.8547662496566772, "learning_rate": 9.884923237022987e-06, "loss": 0.9278, "step": 2729 }, { "epoch": 0.09642852412258432, "grad_norm": 1.9163445234298706, "learning_rate": 9.884801189840629e-06, "loss": 0.9178, "step": 2730 }, { "epoch": 0.09646384592629223, "grad_norm": 1.7591756582260132, "learning_rate": 9.884679078726911e-06, "loss": 0.8615, "step": 2731 }, { "epoch": 0.09649916773000013, "grad_norm": 1.9724297523498535, "learning_rate": 9.88455690368343e-06, "loss": 0.8935, "step": 2732 }, { "epoch": 0.09653448953370804, "grad_norm": 1.8290963172912598, "learning_rate": 9.884434664711788e-06, "loss": 0.8758, "step": 2733 }, { "epoch": 0.09656981133741595, "grad_norm": 1.8948808908462524, "learning_rate": 9.884312361813583e-06, "loss": 0.9108, "step": 2734 }, { "epoch": 0.09660513314112386, "grad_norm": 1.7329295873641968, "learning_rate": 9.884189994990415e-06, "loss": 0.8987, "step": 2735 }, { "epoch": 0.09664045494483176, "grad_norm": 1.8909640312194824, "learning_rate": 9.884067564243887e-06, "loss": 0.886, "step": 2736 }, { "epoch": 0.09667577674853967, "grad_norm": 1.7064286470413208, "learning_rate": 9.883945069575601e-06, "loss": 0.9326, "step": 2737 }, { "epoch": 0.09671109855224758, "grad_norm": 1.7015293836593628, "learning_rate": 9.883822510987159e-06, "loss": 0.9173, "step": 2738 }, { "epoch": 0.09674642035595547, "grad_norm": 1.7909018993377686, "learning_rate": 9.883699888480169e-06, "loss": 0.8736, "step": 2739 }, { "epoch": 0.09678174215966338, "grad_norm": 1.8761439323425293, "learning_rate": 9.88357720205623e-06, "loss": 0.8677, "step": 2740 }, { "epoch": 0.0968170639633713, "grad_norm": 1.7741326093673706, "learning_rate": 9.883454451716952e-06, "loss": 0.8847, "step": 2741 }, { "epoch": 0.09685238576707919, "grad_norm": 1.9362661838531494, "learning_rate": 9.883331637463939e-06, "loss": 0.8879, "step": 2742 }, { "epoch": 0.0968877075707871, "grad_norm": 1.615695595741272, "learning_rate": 9.883208759298798e-06, "loss": 0.8632, "step": 2743 }, { "epoch": 0.09692302937449501, "grad_norm": 1.9288604259490967, "learning_rate": 9.883085817223143e-06, "loss": 0.8722, "step": 2744 }, { "epoch": 0.09695835117820291, "grad_norm": 1.7500290870666504, "learning_rate": 9.882962811238576e-06, "loss": 0.8913, "step": 2745 }, { "epoch": 0.09699367298191082, "grad_norm": 1.6125895977020264, "learning_rate": 9.88283974134671e-06, "loss": 0.864, "step": 2746 }, { "epoch": 0.09702899478561873, "grad_norm": 1.985230803489685, "learning_rate": 9.882716607549157e-06, "loss": 0.8687, "step": 2747 }, { "epoch": 0.09706431658932664, "grad_norm": 1.856278657913208, "learning_rate": 9.882593409847524e-06, "loss": 0.9308, "step": 2748 }, { "epoch": 0.09709963839303454, "grad_norm": 1.9528746604919434, "learning_rate": 9.882470148243427e-06, "loss": 0.8765, "step": 2749 }, { "epoch": 0.09713496019674245, "grad_norm": 2.0185863971710205, "learning_rate": 9.88234682273848e-06, "loss": 0.9455, "step": 2750 }, { "epoch": 0.09717028200045036, "grad_norm": 1.7418121099472046, "learning_rate": 9.882223433334295e-06, "loss": 0.9021, "step": 2751 }, { "epoch": 0.09720560380415826, "grad_norm": 1.6518919467926025, "learning_rate": 9.882099980032487e-06, "loss": 0.9294, "step": 2752 }, { "epoch": 0.09724092560786617, "grad_norm": 1.6928462982177734, "learning_rate": 9.881976462834673e-06, "loss": 0.9123, "step": 2753 }, { "epoch": 0.09727624741157408, "grad_norm": 1.8821609020233154, "learning_rate": 9.881852881742467e-06, "loss": 0.8773, "step": 2754 }, { "epoch": 0.09731156921528197, "grad_norm": 2.3785388469696045, "learning_rate": 9.88172923675749e-06, "loss": 0.8799, "step": 2755 }, { "epoch": 0.09734689101898988, "grad_norm": 1.757778286933899, "learning_rate": 9.881605527881357e-06, "loss": 0.9251, "step": 2756 }, { "epoch": 0.0973822128226978, "grad_norm": 1.7625819444656372, "learning_rate": 9.88148175511569e-06, "loss": 0.877, "step": 2757 }, { "epoch": 0.09741753462640569, "grad_norm": 1.8417168855667114, "learning_rate": 9.881357918462106e-06, "loss": 0.9245, "step": 2758 }, { "epoch": 0.0974528564301136, "grad_norm": 1.7729946374893188, "learning_rate": 9.881234017922227e-06, "loss": 0.8727, "step": 2759 }, { "epoch": 0.09748817823382151, "grad_norm": 2.043790578842163, "learning_rate": 9.881110053497676e-06, "loss": 0.8818, "step": 2760 }, { "epoch": 0.09752350003752942, "grad_norm": 1.8514432907104492, "learning_rate": 9.880986025190073e-06, "loss": 0.8957, "step": 2761 }, { "epoch": 0.09755882184123732, "grad_norm": 1.7063864469528198, "learning_rate": 9.880861933001044e-06, "loss": 0.8942, "step": 2762 }, { "epoch": 0.09759414364494523, "grad_norm": 1.6197054386138916, "learning_rate": 9.88073777693221e-06, "loss": 0.8537, "step": 2763 }, { "epoch": 0.09762946544865314, "grad_norm": 2.1757357120513916, "learning_rate": 9.880613556985199e-06, "loss": 0.9308, "step": 2764 }, { "epoch": 0.09766478725236104, "grad_norm": 1.6555167436599731, "learning_rate": 9.880489273161633e-06, "loss": 0.8451, "step": 2765 }, { "epoch": 0.09770010905606895, "grad_norm": 1.7194637060165405, "learning_rate": 9.880364925463142e-06, "loss": 0.8968, "step": 2766 }, { "epoch": 0.09773543085977686, "grad_norm": 1.607836365699768, "learning_rate": 9.880240513891354e-06, "loss": 0.8909, "step": 2767 }, { "epoch": 0.09777075266348476, "grad_norm": 1.8530247211456299, "learning_rate": 9.880116038447894e-06, "loss": 0.8791, "step": 2768 }, { "epoch": 0.09780607446719267, "grad_norm": 1.7533692121505737, "learning_rate": 9.879991499134392e-06, "loss": 0.9293, "step": 2769 }, { "epoch": 0.09784139627090058, "grad_norm": 1.7308021783828735, "learning_rate": 9.879866895952478e-06, "loss": 0.8682, "step": 2770 }, { "epoch": 0.09787671807460847, "grad_norm": 1.8422523736953735, "learning_rate": 9.879742228903786e-06, "loss": 0.9171, "step": 2771 }, { "epoch": 0.09791203987831638, "grad_norm": 1.73349928855896, "learning_rate": 9.879617497989944e-06, "loss": 0.9193, "step": 2772 }, { "epoch": 0.0979473616820243, "grad_norm": 1.7135498523712158, "learning_rate": 9.879492703212584e-06, "loss": 0.8819, "step": 2773 }, { "epoch": 0.0979826834857322, "grad_norm": 1.6655021905899048, "learning_rate": 9.879367844573342e-06, "loss": 0.9085, "step": 2774 }, { "epoch": 0.0980180052894401, "grad_norm": 1.917586326599121, "learning_rate": 9.879242922073851e-06, "loss": 0.9203, "step": 2775 }, { "epoch": 0.09805332709314801, "grad_norm": 1.7824699878692627, "learning_rate": 9.879117935715745e-06, "loss": 0.8832, "step": 2776 }, { "epoch": 0.09808864889685592, "grad_norm": 1.8155184984207153, "learning_rate": 9.878992885500662e-06, "loss": 0.8832, "step": 2777 }, { "epoch": 0.09812397070056382, "grad_norm": 1.8186827898025513, "learning_rate": 9.878867771430236e-06, "loss": 0.9261, "step": 2778 }, { "epoch": 0.09815929250427173, "grad_norm": 2.1702945232391357, "learning_rate": 9.878742593506105e-06, "loss": 0.8996, "step": 2779 }, { "epoch": 0.09819461430797964, "grad_norm": 1.8670058250427246, "learning_rate": 9.87861735172991e-06, "loss": 0.9031, "step": 2780 }, { "epoch": 0.09822993611168754, "grad_norm": 1.6849372386932373, "learning_rate": 9.878492046103287e-06, "loss": 0.9011, "step": 2781 }, { "epoch": 0.09826525791539545, "grad_norm": 1.7493786811828613, "learning_rate": 9.878366676627878e-06, "loss": 0.8696, "step": 2782 }, { "epoch": 0.09830057971910336, "grad_norm": 1.7152249813079834, "learning_rate": 9.878241243305322e-06, "loss": 0.907, "step": 2783 }, { "epoch": 0.09833590152281126, "grad_norm": 1.7858277559280396, "learning_rate": 9.878115746137263e-06, "loss": 0.8773, "step": 2784 }, { "epoch": 0.09837122332651917, "grad_norm": 1.891282558441162, "learning_rate": 9.877990185125341e-06, "loss": 0.9555, "step": 2785 }, { "epoch": 0.09840654513022708, "grad_norm": 1.8309504985809326, "learning_rate": 9.8778645602712e-06, "loss": 0.9486, "step": 2786 }, { "epoch": 0.09844186693393499, "grad_norm": 2.02653431892395, "learning_rate": 9.877738871576485e-06, "loss": 0.8977, "step": 2787 }, { "epoch": 0.09847718873764288, "grad_norm": 1.6639677286148071, "learning_rate": 9.877613119042841e-06, "loss": 0.8504, "step": 2788 }, { "epoch": 0.0985125105413508, "grad_norm": 2.1257452964782715, "learning_rate": 9.877487302671914e-06, "loss": 0.9488, "step": 2789 }, { "epoch": 0.0985478323450587, "grad_norm": 1.689614176750183, "learning_rate": 9.877361422465348e-06, "loss": 0.8665, "step": 2790 }, { "epoch": 0.0985831541487666, "grad_norm": 2.312730073928833, "learning_rate": 9.877235478424793e-06, "loss": 0.9046, "step": 2791 }, { "epoch": 0.09861847595247451, "grad_norm": 2.216675281524658, "learning_rate": 9.877109470551898e-06, "loss": 0.9038, "step": 2792 }, { "epoch": 0.09865379775618242, "grad_norm": 1.6476846933364868, "learning_rate": 9.876983398848312e-06, "loss": 0.9098, "step": 2793 }, { "epoch": 0.09868911955989032, "grad_norm": 1.7753809690475464, "learning_rate": 9.876857263315681e-06, "loss": 0.8925, "step": 2794 }, { "epoch": 0.09872444136359823, "grad_norm": 2.23654842376709, "learning_rate": 9.876731063955661e-06, "loss": 0.9094, "step": 2795 }, { "epoch": 0.09875976316730614, "grad_norm": 1.9977787733078003, "learning_rate": 9.8766048007699e-06, "loss": 0.9327, "step": 2796 }, { "epoch": 0.09879508497101404, "grad_norm": 1.7465773820877075, "learning_rate": 9.876478473760053e-06, "loss": 0.891, "step": 2797 }, { "epoch": 0.09883040677472195, "grad_norm": 3.07717227935791, "learning_rate": 9.876352082927774e-06, "loss": 0.9422, "step": 2798 }, { "epoch": 0.09886572857842986, "grad_norm": 2.1326777935028076, "learning_rate": 9.876225628274714e-06, "loss": 0.9248, "step": 2799 }, { "epoch": 0.09890105038213777, "grad_norm": 1.8837475776672363, "learning_rate": 9.87609910980253e-06, "loss": 0.8752, "step": 2800 }, { "epoch": 0.09893637218584567, "grad_norm": 1.847211480140686, "learning_rate": 9.875972527512877e-06, "loss": 0.9111, "step": 2801 }, { "epoch": 0.09897169398955358, "grad_norm": 1.8741835355758667, "learning_rate": 9.875845881407414e-06, "loss": 0.9047, "step": 2802 }, { "epoch": 0.09900701579326149, "grad_norm": 2.025557518005371, "learning_rate": 9.875719171487795e-06, "loss": 0.9044, "step": 2803 }, { "epoch": 0.09904233759696938, "grad_norm": 1.9626376628875732, "learning_rate": 9.875592397755678e-06, "loss": 0.8878, "step": 2804 }, { "epoch": 0.0990776594006773, "grad_norm": 1.7118446826934814, "learning_rate": 9.875465560212727e-06, "loss": 0.8752, "step": 2805 }, { "epoch": 0.0991129812043852, "grad_norm": 2.020103693008423, "learning_rate": 9.875338658860598e-06, "loss": 0.9222, "step": 2806 }, { "epoch": 0.0991483030080931, "grad_norm": 1.8105756044387817, "learning_rate": 9.875211693700954e-06, "loss": 0.8946, "step": 2807 }, { "epoch": 0.09918362481180101, "grad_norm": 1.9960811138153076, "learning_rate": 9.875084664735454e-06, "loss": 0.9249, "step": 2808 }, { "epoch": 0.09921894661550892, "grad_norm": 1.7560070753097534, "learning_rate": 9.874957571965766e-06, "loss": 0.8498, "step": 2809 }, { "epoch": 0.09925426841921682, "grad_norm": 1.8872977495193481, "learning_rate": 9.874830415393546e-06, "loss": 0.8873, "step": 2810 }, { "epoch": 0.09928959022292473, "grad_norm": 1.7425618171691895, "learning_rate": 9.874703195020465e-06, "loss": 0.9018, "step": 2811 }, { "epoch": 0.09932491202663264, "grad_norm": 2.05726957321167, "learning_rate": 9.874575910848183e-06, "loss": 0.9239, "step": 2812 }, { "epoch": 0.09936023383034055, "grad_norm": 1.8079220056533813, "learning_rate": 9.874448562878367e-06, "loss": 0.9205, "step": 2813 }, { "epoch": 0.09939555563404845, "grad_norm": 1.7189606428146362, "learning_rate": 9.874321151112686e-06, "loss": 0.913, "step": 2814 }, { "epoch": 0.09943087743775636, "grad_norm": 1.8739484548568726, "learning_rate": 9.874193675552806e-06, "loss": 0.9111, "step": 2815 }, { "epoch": 0.09946619924146427, "grad_norm": 1.5664173364639282, "learning_rate": 9.874066136200394e-06, "loss": 0.8767, "step": 2816 }, { "epoch": 0.09950152104517217, "grad_norm": 1.7467950582504272, "learning_rate": 9.873938533057122e-06, "loss": 0.8744, "step": 2817 }, { "epoch": 0.09953684284888008, "grad_norm": 1.869242548942566, "learning_rate": 9.873810866124657e-06, "loss": 0.8978, "step": 2818 }, { "epoch": 0.09957216465258799, "grad_norm": 1.9214168787002563, "learning_rate": 9.873683135404675e-06, "loss": 0.9082, "step": 2819 }, { "epoch": 0.09960748645629589, "grad_norm": 1.6657267808914185, "learning_rate": 9.873555340898842e-06, "loss": 0.8879, "step": 2820 }, { "epoch": 0.0996428082600038, "grad_norm": 1.6680140495300293, "learning_rate": 9.873427482608833e-06, "loss": 0.917, "step": 2821 }, { "epoch": 0.0996781300637117, "grad_norm": 1.8481522798538208, "learning_rate": 9.873299560536321e-06, "loss": 0.8832, "step": 2822 }, { "epoch": 0.0997134518674196, "grad_norm": 1.7736424207687378, "learning_rate": 9.87317157468298e-06, "loss": 0.8931, "step": 2823 }, { "epoch": 0.09974877367112751, "grad_norm": 2.0809309482574463, "learning_rate": 9.873043525050487e-06, "loss": 0.8885, "step": 2824 }, { "epoch": 0.09978409547483542, "grad_norm": 1.7846444845199585, "learning_rate": 9.872915411640515e-06, "loss": 0.906, "step": 2825 }, { "epoch": 0.09981941727854333, "grad_norm": 1.8263795375823975, "learning_rate": 9.872787234454743e-06, "loss": 0.9154, "step": 2826 }, { "epoch": 0.09985473908225123, "grad_norm": 1.830932378768921, "learning_rate": 9.872658993494847e-06, "loss": 0.9287, "step": 2827 }, { "epoch": 0.09989006088595914, "grad_norm": 1.7637126445770264, "learning_rate": 9.872530688762506e-06, "loss": 0.9096, "step": 2828 }, { "epoch": 0.09992538268966705, "grad_norm": 1.867845892906189, "learning_rate": 9.8724023202594e-06, "loss": 0.8964, "step": 2829 }, { "epoch": 0.09996070449337495, "grad_norm": 2.5874972343444824, "learning_rate": 9.872273887987207e-06, "loss": 0.9273, "step": 2830 }, { "epoch": 0.09999602629708286, "grad_norm": 1.8175172805786133, "learning_rate": 9.872145391947611e-06, "loss": 0.8953, "step": 2831 }, { "epoch": 0.10003134810079077, "grad_norm": 1.7306735515594482, "learning_rate": 9.872016832142291e-06, "loss": 0.8706, "step": 2832 }, { "epoch": 0.10006666990449867, "grad_norm": 1.7805497646331787, "learning_rate": 9.87188820857293e-06, "loss": 0.8802, "step": 2833 }, { "epoch": 0.10010199170820658, "grad_norm": 1.7431896924972534, "learning_rate": 9.871759521241213e-06, "loss": 0.935, "step": 2834 }, { "epoch": 0.10013731351191449, "grad_norm": 1.870233416557312, "learning_rate": 9.871630770148823e-06, "loss": 0.8733, "step": 2835 }, { "epoch": 0.10017263531562239, "grad_norm": 2.180697441101074, "learning_rate": 9.871501955297444e-06, "loss": 0.9252, "step": 2836 }, { "epoch": 0.1002079571193303, "grad_norm": 2.0527002811431885, "learning_rate": 9.871373076688765e-06, "loss": 0.9052, "step": 2837 }, { "epoch": 0.1002432789230382, "grad_norm": 2.0422723293304443, "learning_rate": 9.87124413432447e-06, "loss": 0.8996, "step": 2838 }, { "epoch": 0.10027860072674612, "grad_norm": 1.9628543853759766, "learning_rate": 9.871115128206248e-06, "loss": 0.888, "step": 2839 }, { "epoch": 0.10031392253045401, "grad_norm": 1.6218994855880737, "learning_rate": 9.870986058335787e-06, "loss": 0.8634, "step": 2840 }, { "epoch": 0.10034924433416192, "grad_norm": 1.7619167566299438, "learning_rate": 9.870856924714776e-06, "loss": 0.8652, "step": 2841 }, { "epoch": 0.10038456613786984, "grad_norm": 2.0025157928466797, "learning_rate": 9.870727727344904e-06, "loss": 0.907, "step": 2842 }, { "epoch": 0.10041988794157773, "grad_norm": 1.7305641174316406, "learning_rate": 9.870598466227864e-06, "loss": 0.8638, "step": 2843 }, { "epoch": 0.10045520974528564, "grad_norm": 1.7430858612060547, "learning_rate": 9.870469141365347e-06, "loss": 0.8979, "step": 2844 }, { "epoch": 0.10049053154899355, "grad_norm": 1.6730849742889404, "learning_rate": 9.870339752759044e-06, "loss": 0.8653, "step": 2845 }, { "epoch": 0.10052585335270145, "grad_norm": 1.9711377620697021, "learning_rate": 9.870210300410651e-06, "loss": 0.9166, "step": 2846 }, { "epoch": 0.10056117515640936, "grad_norm": 1.8796977996826172, "learning_rate": 9.870080784321861e-06, "loss": 0.8628, "step": 2847 }, { "epoch": 0.10059649696011727, "grad_norm": 1.8327049016952515, "learning_rate": 9.86995120449437e-06, "loss": 0.884, "step": 2848 }, { "epoch": 0.10063181876382518, "grad_norm": 1.868687629699707, "learning_rate": 9.86982156092987e-06, "loss": 0.9119, "step": 2849 }, { "epoch": 0.10066714056753308, "grad_norm": 1.9108452796936035, "learning_rate": 9.869691853630065e-06, "loss": 0.9087, "step": 2850 }, { "epoch": 0.10070246237124099, "grad_norm": 1.7645589113235474, "learning_rate": 9.869562082596646e-06, "loss": 0.8855, "step": 2851 }, { "epoch": 0.1007377841749489, "grad_norm": 1.8312045335769653, "learning_rate": 9.869432247831315e-06, "loss": 0.9277, "step": 2852 }, { "epoch": 0.1007731059786568, "grad_norm": 1.7524757385253906, "learning_rate": 9.86930234933577e-06, "loss": 0.9161, "step": 2853 }, { "epoch": 0.1008084277823647, "grad_norm": 1.6694291830062866, "learning_rate": 9.86917238711171e-06, "loss": 0.8957, "step": 2854 }, { "epoch": 0.10084374958607262, "grad_norm": 1.981123924255371, "learning_rate": 9.869042361160837e-06, "loss": 0.8926, "step": 2855 }, { "epoch": 0.10087907138978051, "grad_norm": 1.69804048538208, "learning_rate": 9.868912271484853e-06, "loss": 0.8676, "step": 2856 }, { "epoch": 0.10091439319348842, "grad_norm": 4.641025066375732, "learning_rate": 9.868782118085463e-06, "loss": 0.856, "step": 2857 }, { "epoch": 0.10094971499719634, "grad_norm": 1.825746774673462, "learning_rate": 9.868651900964365e-06, "loss": 0.894, "step": 2858 }, { "epoch": 0.10098503680090423, "grad_norm": 1.9888681173324585, "learning_rate": 9.868521620123268e-06, "loss": 0.9235, "step": 2859 }, { "epoch": 0.10102035860461214, "grad_norm": 1.799434781074524, "learning_rate": 9.868391275563874e-06, "loss": 0.9198, "step": 2860 }, { "epoch": 0.10105568040832005, "grad_norm": 1.7646571397781372, "learning_rate": 9.86826086728789e-06, "loss": 0.8745, "step": 2861 }, { "epoch": 0.10109100221202796, "grad_norm": 1.8592971563339233, "learning_rate": 9.868130395297024e-06, "loss": 0.9123, "step": 2862 }, { "epoch": 0.10112632401573586, "grad_norm": 1.8697805404663086, "learning_rate": 9.86799985959298e-06, "loss": 0.9051, "step": 2863 }, { "epoch": 0.10116164581944377, "grad_norm": 1.6584460735321045, "learning_rate": 9.867869260177472e-06, "loss": 0.8991, "step": 2864 }, { "epoch": 0.10119696762315168, "grad_norm": 1.6948022842407227, "learning_rate": 9.867738597052206e-06, "loss": 0.8828, "step": 2865 }, { "epoch": 0.10123228942685958, "grad_norm": 1.6994527578353882, "learning_rate": 9.86760787021889e-06, "loss": 0.8967, "step": 2866 }, { "epoch": 0.10126761123056749, "grad_norm": 1.6102436780929565, "learning_rate": 9.867477079679238e-06, "loss": 0.9024, "step": 2867 }, { "epoch": 0.1013029330342754, "grad_norm": 1.7901432514190674, "learning_rate": 9.867346225434961e-06, "loss": 0.8983, "step": 2868 }, { "epoch": 0.1013382548379833, "grad_norm": 1.7154459953308105, "learning_rate": 9.867215307487773e-06, "loss": 0.8815, "step": 2869 }, { "epoch": 0.10137357664169121, "grad_norm": 1.7526313066482544, "learning_rate": 9.867084325839384e-06, "loss": 0.8932, "step": 2870 }, { "epoch": 0.10140889844539912, "grad_norm": 1.7750192880630493, "learning_rate": 9.866953280491512e-06, "loss": 0.9085, "step": 2871 }, { "epoch": 0.10144422024910701, "grad_norm": 1.9191038608551025, "learning_rate": 9.866822171445869e-06, "loss": 0.8762, "step": 2872 }, { "epoch": 0.10147954205281493, "grad_norm": 1.6683661937713623, "learning_rate": 9.866690998704174e-06, "loss": 0.8589, "step": 2873 }, { "epoch": 0.10151486385652284, "grad_norm": 1.9661575555801392, "learning_rate": 9.86655976226814e-06, "loss": 0.9249, "step": 2874 }, { "epoch": 0.10155018566023075, "grad_norm": 1.86459481716156, "learning_rate": 9.866428462139488e-06, "loss": 0.8928, "step": 2875 }, { "epoch": 0.10158550746393864, "grad_norm": 1.7586671113967896, "learning_rate": 9.866297098319935e-06, "loss": 0.8986, "step": 2876 }, { "epoch": 0.10162082926764655, "grad_norm": 1.7893351316452026, "learning_rate": 9.866165670811198e-06, "loss": 0.891, "step": 2877 }, { "epoch": 0.10165615107135446, "grad_norm": 2.051481246948242, "learning_rate": 9.866034179615003e-06, "loss": 0.8704, "step": 2878 }, { "epoch": 0.10169147287506236, "grad_norm": 1.8655146360397339, "learning_rate": 9.865902624733066e-06, "loss": 0.9334, "step": 2879 }, { "epoch": 0.10172679467877027, "grad_norm": 1.7584069967269897, "learning_rate": 9.865771006167108e-06, "loss": 0.8996, "step": 2880 }, { "epoch": 0.10176211648247818, "grad_norm": 1.2190709114074707, "learning_rate": 9.865639323918856e-06, "loss": 0.6114, "step": 2881 }, { "epoch": 0.10179743828618608, "grad_norm": 1.7932180166244507, "learning_rate": 9.865507577990033e-06, "loss": 0.8707, "step": 2882 }, { "epoch": 0.10183276008989399, "grad_norm": 1.939286231994629, "learning_rate": 9.865375768382357e-06, "loss": 0.9147, "step": 2883 }, { "epoch": 0.1018680818936019, "grad_norm": 1.7543997764587402, "learning_rate": 9.86524389509756e-06, "loss": 0.9074, "step": 2884 }, { "epoch": 0.1019034036973098, "grad_norm": 1.7033034563064575, "learning_rate": 9.865111958137366e-06, "loss": 0.8775, "step": 2885 }, { "epoch": 0.10193872550101771, "grad_norm": 2.0627598762512207, "learning_rate": 9.8649799575035e-06, "loss": 0.9016, "step": 2886 }, { "epoch": 0.10197404730472562, "grad_norm": 1.8318852186203003, "learning_rate": 9.86484789319769e-06, "loss": 0.8933, "step": 2887 }, { "epoch": 0.10200936910843353, "grad_norm": 1.7263920307159424, "learning_rate": 9.864715765221669e-06, "loss": 0.8815, "step": 2888 }, { "epoch": 0.10204469091214143, "grad_norm": 1.6944894790649414, "learning_rate": 9.864583573577158e-06, "loss": 0.8797, "step": 2889 }, { "epoch": 0.10208001271584934, "grad_norm": 1.7423561811447144, "learning_rate": 9.864451318265895e-06, "loss": 0.9427, "step": 2890 }, { "epoch": 0.10211533451955725, "grad_norm": 1.7468254566192627, "learning_rate": 9.864318999289605e-06, "loss": 0.84, "step": 2891 }, { "epoch": 0.10215065632326514, "grad_norm": 1.8363888263702393, "learning_rate": 9.864186616650024e-06, "loss": 0.8872, "step": 2892 }, { "epoch": 0.10218597812697305, "grad_norm": 1.8878742456436157, "learning_rate": 9.864054170348883e-06, "loss": 0.8594, "step": 2893 }, { "epoch": 0.10222129993068096, "grad_norm": 1.8641831874847412, "learning_rate": 9.863921660387916e-06, "loss": 0.9045, "step": 2894 }, { "epoch": 0.10225662173438886, "grad_norm": 1.9943901300430298, "learning_rate": 9.863789086768854e-06, "loss": 0.916, "step": 2895 }, { "epoch": 0.10229194353809677, "grad_norm": 1.8112695217132568, "learning_rate": 9.863656449493437e-06, "loss": 0.8818, "step": 2896 }, { "epoch": 0.10232726534180468, "grad_norm": 1.3291151523590088, "learning_rate": 9.863523748563398e-06, "loss": 0.5985, "step": 2897 }, { "epoch": 0.10236258714551258, "grad_norm": 2.0249269008636475, "learning_rate": 9.863390983980474e-06, "loss": 0.8908, "step": 2898 }, { "epoch": 0.10239790894922049, "grad_norm": 1.9266525506973267, "learning_rate": 9.863258155746404e-06, "loss": 0.9325, "step": 2899 }, { "epoch": 0.1024332307529284, "grad_norm": 4.290766716003418, "learning_rate": 9.863125263862925e-06, "loss": 0.9303, "step": 2900 }, { "epoch": 0.10246855255663631, "grad_norm": 1.7035582065582275, "learning_rate": 9.862992308331775e-06, "loss": 0.9306, "step": 2901 }, { "epoch": 0.10250387436034421, "grad_norm": 3.007530927658081, "learning_rate": 9.862859289154698e-06, "loss": 0.9265, "step": 2902 }, { "epoch": 0.10253919616405212, "grad_norm": 1.760431170463562, "learning_rate": 9.862726206333432e-06, "loss": 0.8564, "step": 2903 }, { "epoch": 0.10257451796776003, "grad_norm": 2.038188934326172, "learning_rate": 9.862593059869719e-06, "loss": 0.9334, "step": 2904 }, { "epoch": 0.10260983977146793, "grad_norm": 2.301166296005249, "learning_rate": 9.862459849765303e-06, "loss": 0.916, "step": 2905 }, { "epoch": 0.10264516157517584, "grad_norm": 1.8922926187515259, "learning_rate": 9.862326576021927e-06, "loss": 0.9386, "step": 2906 }, { "epoch": 0.10268048337888375, "grad_norm": 1.8263695240020752, "learning_rate": 9.862193238641334e-06, "loss": 0.8827, "step": 2907 }, { "epoch": 0.10271580518259164, "grad_norm": 1.9784040451049805, "learning_rate": 9.86205983762527e-06, "loss": 0.8572, "step": 2908 }, { "epoch": 0.10275112698629955, "grad_norm": 1.6747280359268188, "learning_rate": 9.86192637297548e-06, "loss": 0.9293, "step": 2909 }, { "epoch": 0.10278644879000746, "grad_norm": 1.8395721912384033, "learning_rate": 9.861792844693712e-06, "loss": 0.9194, "step": 2910 }, { "epoch": 0.10282177059371536, "grad_norm": 1.8221473693847656, "learning_rate": 9.861659252781713e-06, "loss": 0.8888, "step": 2911 }, { "epoch": 0.10285709239742327, "grad_norm": 1.7191630601882935, "learning_rate": 9.861525597241232e-06, "loss": 0.9013, "step": 2912 }, { "epoch": 0.10289241420113118, "grad_norm": 1.841676115989685, "learning_rate": 9.861391878074019e-06, "loss": 0.9258, "step": 2913 }, { "epoch": 0.1029277360048391, "grad_norm": 1.7419352531433105, "learning_rate": 9.861258095281821e-06, "loss": 0.9128, "step": 2914 }, { "epoch": 0.10296305780854699, "grad_norm": 1.6937589645385742, "learning_rate": 9.861124248866391e-06, "loss": 0.9428, "step": 2915 }, { "epoch": 0.1029983796122549, "grad_norm": 1.799342393875122, "learning_rate": 9.860990338829483e-06, "loss": 0.8518, "step": 2916 }, { "epoch": 0.10303370141596281, "grad_norm": 1.8076280355453491, "learning_rate": 9.860856365172845e-06, "loss": 0.8616, "step": 2917 }, { "epoch": 0.10306902321967071, "grad_norm": 1.6842751502990723, "learning_rate": 9.860722327898233e-06, "loss": 0.8606, "step": 2918 }, { "epoch": 0.10310434502337862, "grad_norm": 1.63871169090271, "learning_rate": 9.860588227007402e-06, "loss": 0.8913, "step": 2919 }, { "epoch": 0.10313966682708653, "grad_norm": 1.73612642288208, "learning_rate": 9.860454062502104e-06, "loss": 0.8879, "step": 2920 }, { "epoch": 0.10317498863079443, "grad_norm": 1.8319814205169678, "learning_rate": 9.860319834384097e-06, "loss": 0.9108, "step": 2921 }, { "epoch": 0.10321031043450234, "grad_norm": 1.6965395212173462, "learning_rate": 9.860185542655139e-06, "loss": 0.8904, "step": 2922 }, { "epoch": 0.10324563223821025, "grad_norm": 1.8571290969848633, "learning_rate": 9.860051187316986e-06, "loss": 0.8645, "step": 2923 }, { "epoch": 0.10328095404191814, "grad_norm": 1.7427130937576294, "learning_rate": 9.859916768371395e-06, "loss": 0.8886, "step": 2924 }, { "epoch": 0.10331627584562605, "grad_norm": 1.7413558959960938, "learning_rate": 9.85978228582013e-06, "loss": 0.8883, "step": 2925 }, { "epoch": 0.10335159764933396, "grad_norm": 1.899975299835205, "learning_rate": 9.859647739664946e-06, "loss": 0.8774, "step": 2926 }, { "epoch": 0.10338691945304188, "grad_norm": 1.7677550315856934, "learning_rate": 9.859513129907607e-06, "loss": 0.8624, "step": 2927 }, { "epoch": 0.10342224125674977, "grad_norm": 1.7294577360153198, "learning_rate": 9.859378456549873e-06, "loss": 0.9001, "step": 2928 }, { "epoch": 0.10345756306045768, "grad_norm": 1.7374378442764282, "learning_rate": 9.859243719593506e-06, "loss": 0.8467, "step": 2929 }, { "epoch": 0.1034928848641656, "grad_norm": 1.7262057065963745, "learning_rate": 9.859108919040272e-06, "loss": 0.8989, "step": 2930 }, { "epoch": 0.10352820666787349, "grad_norm": 2.1865668296813965, "learning_rate": 9.858974054891933e-06, "loss": 0.9174, "step": 2931 }, { "epoch": 0.1035635284715814, "grad_norm": 1.9737451076507568, "learning_rate": 9.858839127150257e-06, "loss": 0.9107, "step": 2932 }, { "epoch": 0.10359885027528931, "grad_norm": 1.718224048614502, "learning_rate": 9.858704135817005e-06, "loss": 0.8796, "step": 2933 }, { "epoch": 0.10363417207899721, "grad_norm": 1.8292597532272339, "learning_rate": 9.858569080893948e-06, "loss": 0.8873, "step": 2934 }, { "epoch": 0.10366949388270512, "grad_norm": 1.8690778017044067, "learning_rate": 9.858433962382853e-06, "loss": 0.8756, "step": 2935 }, { "epoch": 0.10370481568641303, "grad_norm": 1.8897958993911743, "learning_rate": 9.858298780285487e-06, "loss": 0.8976, "step": 2936 }, { "epoch": 0.10374013749012093, "grad_norm": 1.623833417892456, "learning_rate": 9.85816353460362e-06, "loss": 0.8516, "step": 2937 }, { "epoch": 0.10377545929382884, "grad_norm": 1.861137866973877, "learning_rate": 9.85802822533902e-06, "loss": 0.9135, "step": 2938 }, { "epoch": 0.10381078109753675, "grad_norm": 1.8170589208602905, "learning_rate": 9.857892852493463e-06, "loss": 0.859, "step": 2939 }, { "epoch": 0.10384610290124466, "grad_norm": 2.13596248626709, "learning_rate": 9.857757416068714e-06, "loss": 0.9183, "step": 2940 }, { "epoch": 0.10388142470495255, "grad_norm": 1.8313031196594238, "learning_rate": 9.85762191606655e-06, "loss": 0.8745, "step": 2941 }, { "epoch": 0.10391674650866047, "grad_norm": 1.767484188079834, "learning_rate": 9.857486352488746e-06, "loss": 0.8827, "step": 2942 }, { "epoch": 0.10395206831236838, "grad_norm": 1.8935275077819824, "learning_rate": 9.857350725337072e-06, "loss": 0.9027, "step": 2943 }, { "epoch": 0.10398739011607627, "grad_norm": 1.828615665435791, "learning_rate": 9.857215034613306e-06, "loss": 0.9011, "step": 2944 }, { "epoch": 0.10402271191978418, "grad_norm": 1.7775311470031738, "learning_rate": 9.85707928031922e-06, "loss": 0.9112, "step": 2945 }, { "epoch": 0.1040580337234921, "grad_norm": 1.7027571201324463, "learning_rate": 9.856943462456596e-06, "loss": 0.9147, "step": 2946 }, { "epoch": 0.10409335552719999, "grad_norm": 1.859455943107605, "learning_rate": 9.856807581027208e-06, "loss": 0.9398, "step": 2947 }, { "epoch": 0.1041286773309079, "grad_norm": 1.6764026880264282, "learning_rate": 9.856671636032834e-06, "loss": 0.8613, "step": 2948 }, { "epoch": 0.10416399913461581, "grad_norm": 1.7899667024612427, "learning_rate": 9.856535627475257e-06, "loss": 0.8696, "step": 2949 }, { "epoch": 0.10419932093832372, "grad_norm": 1.8172358274459839, "learning_rate": 9.856399555356252e-06, "loss": 0.8692, "step": 2950 }, { "epoch": 0.10423464274203162, "grad_norm": 1.6903249025344849, "learning_rate": 9.856263419677605e-06, "loss": 0.9135, "step": 2951 }, { "epoch": 0.10426996454573953, "grad_norm": 1.853700041770935, "learning_rate": 9.856127220441095e-06, "loss": 0.9003, "step": 2952 }, { "epoch": 0.10430528634944744, "grad_norm": 1.7623802423477173, "learning_rate": 9.855990957648504e-06, "loss": 0.8947, "step": 2953 }, { "epoch": 0.10434060815315534, "grad_norm": 1.6586660146713257, "learning_rate": 9.855854631301615e-06, "loss": 0.8911, "step": 2954 }, { "epoch": 0.10437592995686325, "grad_norm": 1.738593578338623, "learning_rate": 9.855718241402217e-06, "loss": 0.9005, "step": 2955 }, { "epoch": 0.10441125176057116, "grad_norm": 1.7417957782745361, "learning_rate": 9.855581787952088e-06, "loss": 0.8947, "step": 2956 }, { "epoch": 0.10444657356427905, "grad_norm": 1.7234091758728027, "learning_rate": 9.85544527095302e-06, "loss": 0.8882, "step": 2957 }, { "epoch": 0.10448189536798697, "grad_norm": 1.7993156909942627, "learning_rate": 9.855308690406795e-06, "loss": 0.9135, "step": 2958 }, { "epoch": 0.10451721717169488, "grad_norm": 1.8519909381866455, "learning_rate": 9.855172046315203e-06, "loss": 0.8981, "step": 2959 }, { "epoch": 0.10455253897540277, "grad_norm": 1.3039950132369995, "learning_rate": 9.85503533868003e-06, "loss": 0.5795, "step": 2960 }, { "epoch": 0.10458786077911068, "grad_norm": 1.8836239576339722, "learning_rate": 9.854898567503071e-06, "loss": 0.9169, "step": 2961 }, { "epoch": 0.1046231825828186, "grad_norm": 1.7472504377365112, "learning_rate": 9.854761732786108e-06, "loss": 0.903, "step": 2962 }, { "epoch": 0.1046585043865265, "grad_norm": 2.001898765563965, "learning_rate": 9.854624834530939e-06, "loss": 0.8799, "step": 2963 }, { "epoch": 0.1046938261902344, "grad_norm": 1.8812642097473145, "learning_rate": 9.854487872739353e-06, "loss": 0.9002, "step": 2964 }, { "epoch": 0.10472914799394231, "grad_norm": 1.9005869626998901, "learning_rate": 9.854350847413141e-06, "loss": 0.9022, "step": 2965 }, { "epoch": 0.10476446979765022, "grad_norm": 2.2232112884521484, "learning_rate": 9.854213758554098e-06, "loss": 0.9112, "step": 2966 }, { "epoch": 0.10479979160135812, "grad_norm": 2.2879562377929688, "learning_rate": 9.854076606164018e-06, "loss": 0.8742, "step": 2967 }, { "epoch": 0.10483511340506603, "grad_norm": 1.974349021911621, "learning_rate": 9.853939390244695e-06, "loss": 0.8972, "step": 2968 }, { "epoch": 0.10487043520877394, "grad_norm": 1.8546195030212402, "learning_rate": 9.853802110797926e-06, "loss": 0.8858, "step": 2969 }, { "epoch": 0.10490575701248184, "grad_norm": 1.9236938953399658, "learning_rate": 9.853664767825509e-06, "loss": 0.8885, "step": 2970 }, { "epoch": 0.10494107881618975, "grad_norm": 1.6934905052185059, "learning_rate": 9.853527361329238e-06, "loss": 0.8653, "step": 2971 }, { "epoch": 0.10497640061989766, "grad_norm": 1.8552969694137573, "learning_rate": 9.853389891310913e-06, "loss": 0.9005, "step": 2972 }, { "epoch": 0.10501172242360556, "grad_norm": 1.5997188091278076, "learning_rate": 9.853252357772336e-06, "loss": 0.8794, "step": 2973 }, { "epoch": 0.10504704422731347, "grad_norm": 1.8184527158737183, "learning_rate": 9.853114760715301e-06, "loss": 0.9236, "step": 2974 }, { "epoch": 0.10508236603102138, "grad_norm": 15.876862525939941, "learning_rate": 9.852977100141614e-06, "loss": 0.8998, "step": 2975 }, { "epoch": 0.10511768783472929, "grad_norm": 1.8160064220428467, "learning_rate": 9.852839376053076e-06, "loss": 0.8972, "step": 2976 }, { "epoch": 0.10515300963843718, "grad_norm": 1.6788798570632935, "learning_rate": 9.852701588451487e-06, "loss": 0.8801, "step": 2977 }, { "epoch": 0.1051883314421451, "grad_norm": 1.8763359785079956, "learning_rate": 9.852563737338652e-06, "loss": 0.8706, "step": 2978 }, { "epoch": 0.105223653245853, "grad_norm": 1.6678143739700317, "learning_rate": 9.852425822716375e-06, "loss": 0.8795, "step": 2979 }, { "epoch": 0.1052589750495609, "grad_norm": 1.7462520599365234, "learning_rate": 9.852287844586462e-06, "loss": 0.8884, "step": 2980 }, { "epoch": 0.10529429685326881, "grad_norm": 1.6661744117736816, "learning_rate": 9.852149802950718e-06, "loss": 0.8654, "step": 2981 }, { "epoch": 0.10532961865697672, "grad_norm": 1.9447908401489258, "learning_rate": 9.852011697810948e-06, "loss": 0.9105, "step": 2982 }, { "epoch": 0.10536494046068462, "grad_norm": 1.8305033445358276, "learning_rate": 9.85187352916896e-06, "loss": 0.8849, "step": 2983 }, { "epoch": 0.10540026226439253, "grad_norm": 1.8367177248001099, "learning_rate": 9.851735297026565e-06, "loss": 0.8832, "step": 2984 }, { "epoch": 0.10543558406810044, "grad_norm": 1.9122209548950195, "learning_rate": 9.85159700138557e-06, "loss": 0.8912, "step": 2985 }, { "epoch": 0.10547090587180834, "grad_norm": 1.7871947288513184, "learning_rate": 9.851458642247786e-06, "loss": 0.8496, "step": 2986 }, { "epoch": 0.10550622767551625, "grad_norm": 4.282363414764404, "learning_rate": 9.851320219615024e-06, "loss": 0.8997, "step": 2987 }, { "epoch": 0.10554154947922416, "grad_norm": 1.215658187866211, "learning_rate": 9.851181733489093e-06, "loss": 0.5922, "step": 2988 }, { "epoch": 0.10557687128293207, "grad_norm": 1.948424220085144, "learning_rate": 9.851043183871809e-06, "loss": 0.911, "step": 2989 }, { "epoch": 0.10561219308663997, "grad_norm": 1.8123903274536133, "learning_rate": 9.850904570764983e-06, "loss": 0.8671, "step": 2990 }, { "epoch": 0.10564751489034788, "grad_norm": 1.9108715057373047, "learning_rate": 9.85076589417043e-06, "loss": 0.8973, "step": 2991 }, { "epoch": 0.10568283669405579, "grad_norm": 1.828304409980774, "learning_rate": 9.850627154089964e-06, "loss": 0.8811, "step": 2992 }, { "epoch": 0.10571815849776368, "grad_norm": 1.922129511833191, "learning_rate": 9.850488350525402e-06, "loss": 0.9384, "step": 2993 }, { "epoch": 0.1057534803014716, "grad_norm": 1.7366480827331543, "learning_rate": 9.850349483478561e-06, "loss": 0.87, "step": 2994 }, { "epoch": 0.1057888021051795, "grad_norm": 1.850814700126648, "learning_rate": 9.850210552951258e-06, "loss": 0.9205, "step": 2995 }, { "epoch": 0.1058241239088874, "grad_norm": 1.8973723649978638, "learning_rate": 9.85007155894531e-06, "loss": 0.9123, "step": 2996 }, { "epoch": 0.10585944571259531, "grad_norm": 1.8530715703964233, "learning_rate": 9.849932501462539e-06, "loss": 0.8803, "step": 2997 }, { "epoch": 0.10589476751630322, "grad_norm": 1.7136986255645752, "learning_rate": 9.84979338050476e-06, "loss": 0.9103, "step": 2998 }, { "epoch": 0.10593008932001112, "grad_norm": 1.690701961517334, "learning_rate": 9.8496541960738e-06, "loss": 0.883, "step": 2999 }, { "epoch": 0.10596541112371903, "grad_norm": 1.35061514377594, "learning_rate": 9.849514948171474e-06, "loss": 0.63, "step": 3000 }, { "epoch": 0.10600073292742694, "grad_norm": 1.7022324800491333, "learning_rate": 9.849375636799611e-06, "loss": 0.8798, "step": 3001 }, { "epoch": 0.10603605473113485, "grad_norm": 2.0692505836486816, "learning_rate": 9.84923626196003e-06, "loss": 0.886, "step": 3002 }, { "epoch": 0.10607137653484275, "grad_norm": 1.872016191482544, "learning_rate": 9.849096823654556e-06, "loss": 0.9413, "step": 3003 }, { "epoch": 0.10610669833855066, "grad_norm": 1.7531661987304688, "learning_rate": 9.848957321885016e-06, "loss": 0.9101, "step": 3004 }, { "epoch": 0.10614202014225857, "grad_norm": 1.799721121788025, "learning_rate": 9.848817756653234e-06, "loss": 0.9177, "step": 3005 }, { "epoch": 0.10617734194596647, "grad_norm": 2.027294158935547, "learning_rate": 9.848678127961034e-06, "loss": 0.8745, "step": 3006 }, { "epoch": 0.10621266374967438, "grad_norm": 2.045689821243286, "learning_rate": 9.848538435810248e-06, "loss": 0.8953, "step": 3007 }, { "epoch": 0.10624798555338229, "grad_norm": 1.6426414251327515, "learning_rate": 9.848398680202702e-06, "loss": 0.8753, "step": 3008 }, { "epoch": 0.10628330735709018, "grad_norm": 1.7635403871536255, "learning_rate": 9.848258861140225e-06, "loss": 0.862, "step": 3009 }, { "epoch": 0.1063186291607981, "grad_norm": 2.050795078277588, "learning_rate": 9.84811897862465e-06, "loss": 0.9156, "step": 3010 }, { "epoch": 0.106353950964506, "grad_norm": 1.8229014873504639, "learning_rate": 9.847979032657801e-06, "loss": 0.8895, "step": 3011 }, { "epoch": 0.1063892727682139, "grad_norm": 1.121222734451294, "learning_rate": 9.847839023241516e-06, "loss": 0.6039, "step": 3012 }, { "epoch": 0.10642459457192181, "grad_norm": 1.8333712816238403, "learning_rate": 9.847698950377626e-06, "loss": 0.9006, "step": 3013 }, { "epoch": 0.10645991637562972, "grad_norm": 1.7099064588546753, "learning_rate": 9.847558814067962e-06, "loss": 0.8955, "step": 3014 }, { "epoch": 0.10649523817933763, "grad_norm": 1.5894668102264404, "learning_rate": 9.847418614314361e-06, "loss": 0.8826, "step": 3015 }, { "epoch": 0.10653055998304553, "grad_norm": 1.6961718797683716, "learning_rate": 9.847278351118655e-06, "loss": 0.9125, "step": 3016 }, { "epoch": 0.10656588178675344, "grad_norm": 1.7769033908843994, "learning_rate": 9.847138024482681e-06, "loss": 0.8843, "step": 3017 }, { "epoch": 0.10660120359046135, "grad_norm": 1.758398413658142, "learning_rate": 9.846997634408276e-06, "loss": 0.8663, "step": 3018 }, { "epoch": 0.10663652539416925, "grad_norm": 1.7007368803024292, "learning_rate": 9.846857180897279e-06, "loss": 0.9102, "step": 3019 }, { "epoch": 0.10667184719787716, "grad_norm": 1.7985422611236572, "learning_rate": 9.846716663951523e-06, "loss": 0.8836, "step": 3020 }, { "epoch": 0.10670716900158507, "grad_norm": 1.6580053567886353, "learning_rate": 9.846576083572852e-06, "loss": 0.8838, "step": 3021 }, { "epoch": 0.10674249080529297, "grad_norm": 1.0904654264450073, "learning_rate": 9.846435439763104e-06, "loss": 0.6168, "step": 3022 }, { "epoch": 0.10677781260900088, "grad_norm": 1.7768876552581787, "learning_rate": 9.84629473252412e-06, "loss": 0.8829, "step": 3023 }, { "epoch": 0.10681313441270879, "grad_norm": 1.6980946063995361, "learning_rate": 9.846153961857742e-06, "loss": 0.8955, "step": 3024 }, { "epoch": 0.10684845621641668, "grad_norm": 1.9540170431137085, "learning_rate": 9.846013127765812e-06, "loss": 0.8922, "step": 3025 }, { "epoch": 0.1068837780201246, "grad_norm": 1.79323410987854, "learning_rate": 9.845872230250172e-06, "loss": 0.9401, "step": 3026 }, { "epoch": 0.1069190998238325, "grad_norm": 1.8934943675994873, "learning_rate": 9.845731269312668e-06, "loss": 0.8938, "step": 3027 }, { "epoch": 0.10695442162754042, "grad_norm": 1.9096044301986694, "learning_rate": 9.845590244955146e-06, "loss": 0.8418, "step": 3028 }, { "epoch": 0.10698974343124831, "grad_norm": 1.8293895721435547, "learning_rate": 9.845449157179448e-06, "loss": 0.9292, "step": 3029 }, { "epoch": 0.10702506523495622, "grad_norm": 1.6027822494506836, "learning_rate": 9.84530800598742e-06, "loss": 0.8838, "step": 3030 }, { "epoch": 0.10706038703866413, "grad_norm": 1.8467695713043213, "learning_rate": 9.845166791380915e-06, "loss": 0.8964, "step": 3031 }, { "epoch": 0.10709570884237203, "grad_norm": 1.8011518716812134, "learning_rate": 9.845025513361777e-06, "loss": 0.9187, "step": 3032 }, { "epoch": 0.10713103064607994, "grad_norm": 1.7092432975769043, "learning_rate": 9.844884171931855e-06, "loss": 0.8911, "step": 3033 }, { "epoch": 0.10716635244978785, "grad_norm": 1.657213568687439, "learning_rate": 9.844742767092999e-06, "loss": 0.8796, "step": 3034 }, { "epoch": 0.10720167425349575, "grad_norm": 1.6218655109405518, "learning_rate": 9.844601298847063e-06, "loss": 0.8707, "step": 3035 }, { "epoch": 0.10723699605720366, "grad_norm": 1.73682701587677, "learning_rate": 9.844459767195892e-06, "loss": 0.8904, "step": 3036 }, { "epoch": 0.10727231786091157, "grad_norm": 1.818545937538147, "learning_rate": 9.844318172141346e-06, "loss": 0.897, "step": 3037 }, { "epoch": 0.10730763966461947, "grad_norm": 1.7106835842132568, "learning_rate": 9.844176513685273e-06, "loss": 0.8674, "step": 3038 }, { "epoch": 0.10734296146832738, "grad_norm": 2.3734965324401855, "learning_rate": 9.844034791829529e-06, "loss": 0.8909, "step": 3039 }, { "epoch": 0.10737828327203529, "grad_norm": 1.7722200155258179, "learning_rate": 9.843893006575966e-06, "loss": 0.8924, "step": 3040 }, { "epoch": 0.1074136050757432, "grad_norm": 1.962790846824646, "learning_rate": 9.843751157926445e-06, "loss": 0.8828, "step": 3041 }, { "epoch": 0.1074489268794511, "grad_norm": 1.7127364873886108, "learning_rate": 9.843609245882817e-06, "loss": 0.8872, "step": 3042 }, { "epoch": 0.107484248683159, "grad_norm": 1.7700780630111694, "learning_rate": 9.843467270446942e-06, "loss": 0.9107, "step": 3043 }, { "epoch": 0.10751957048686692, "grad_norm": 1.820108413696289, "learning_rate": 9.84332523162068e-06, "loss": 0.8699, "step": 3044 }, { "epoch": 0.10755489229057481, "grad_norm": 1.81892991065979, "learning_rate": 9.843183129405885e-06, "loss": 0.931, "step": 3045 }, { "epoch": 0.10759021409428272, "grad_norm": 1.7354896068572998, "learning_rate": 9.843040963804421e-06, "loss": 0.8981, "step": 3046 }, { "epoch": 0.10762553589799063, "grad_norm": 1.805801272392273, "learning_rate": 9.842898734818148e-06, "loss": 0.9157, "step": 3047 }, { "epoch": 0.10766085770169853, "grad_norm": 1.8304507732391357, "learning_rate": 9.842756442448925e-06, "loss": 0.9081, "step": 3048 }, { "epoch": 0.10769617950540644, "grad_norm": 1.7353755235671997, "learning_rate": 9.842614086698616e-06, "loss": 0.8851, "step": 3049 }, { "epoch": 0.10773150130911435, "grad_norm": 1.6625844240188599, "learning_rate": 9.842471667569085e-06, "loss": 0.8994, "step": 3050 }, { "epoch": 0.10776682311282226, "grad_norm": 1.6740591526031494, "learning_rate": 9.842329185062196e-06, "loss": 0.8779, "step": 3051 }, { "epoch": 0.10780214491653016, "grad_norm": 1.8339885473251343, "learning_rate": 9.842186639179812e-06, "loss": 0.8737, "step": 3052 }, { "epoch": 0.10783746672023807, "grad_norm": 2.212663173675537, "learning_rate": 9.842044029923799e-06, "loss": 0.8587, "step": 3053 }, { "epoch": 0.10787278852394598, "grad_norm": 1.7194339036941528, "learning_rate": 9.841901357296025e-06, "loss": 0.8602, "step": 3054 }, { "epoch": 0.10790811032765388, "grad_norm": 1.7574347257614136, "learning_rate": 9.841758621298355e-06, "loss": 0.9102, "step": 3055 }, { "epoch": 0.10794343213136179, "grad_norm": 1.6793649196624756, "learning_rate": 9.841615821932659e-06, "loss": 0.8985, "step": 3056 }, { "epoch": 0.1079787539350697, "grad_norm": 1.7678226232528687, "learning_rate": 9.841472959200805e-06, "loss": 0.8858, "step": 3057 }, { "epoch": 0.1080140757387776, "grad_norm": 1.8552995920181274, "learning_rate": 9.841330033104664e-06, "loss": 0.9392, "step": 3058 }, { "epoch": 0.1080493975424855, "grad_norm": 1.7777587175369263, "learning_rate": 9.841187043646104e-06, "loss": 0.9122, "step": 3059 }, { "epoch": 0.10808471934619342, "grad_norm": 1.7342880964279175, "learning_rate": 9.841043990826999e-06, "loss": 0.861, "step": 3060 }, { "epoch": 0.10812004114990131, "grad_norm": 1.7086091041564941, "learning_rate": 9.840900874649219e-06, "loss": 0.8854, "step": 3061 }, { "epoch": 0.10815536295360922, "grad_norm": 1.6865137815475464, "learning_rate": 9.84075769511464e-06, "loss": 0.9115, "step": 3062 }, { "epoch": 0.10819068475731713, "grad_norm": 1.9188069105148315, "learning_rate": 9.840614452225134e-06, "loss": 0.9372, "step": 3063 }, { "epoch": 0.10822600656102505, "grad_norm": 1.6947654485702515, "learning_rate": 9.840471145982575e-06, "loss": 0.899, "step": 3064 }, { "epoch": 0.10826132836473294, "grad_norm": 1.8483505249023438, "learning_rate": 9.840327776388841e-06, "loss": 0.9091, "step": 3065 }, { "epoch": 0.10829665016844085, "grad_norm": 1.6205016374588013, "learning_rate": 9.840184343445806e-06, "loss": 0.9054, "step": 3066 }, { "epoch": 0.10833197197214876, "grad_norm": 1.6593681573867798, "learning_rate": 9.840040847155347e-06, "loss": 0.8777, "step": 3067 }, { "epoch": 0.10836729377585666, "grad_norm": 1.5619029998779297, "learning_rate": 9.839897287519346e-06, "loss": 0.8839, "step": 3068 }, { "epoch": 0.10840261557956457, "grad_norm": 1.7595305442810059, "learning_rate": 9.839753664539677e-06, "loss": 0.8683, "step": 3069 }, { "epoch": 0.10843793738327248, "grad_norm": 1.785003423690796, "learning_rate": 9.839609978218223e-06, "loss": 0.9118, "step": 3070 }, { "epoch": 0.10847325918698038, "grad_norm": 1.7172002792358398, "learning_rate": 9.839466228556862e-06, "loss": 0.883, "step": 3071 }, { "epoch": 0.10850858099068829, "grad_norm": 1.7931526899337769, "learning_rate": 9.839322415557477e-06, "loss": 0.8838, "step": 3072 }, { "epoch": 0.1085439027943962, "grad_norm": 1.8829660415649414, "learning_rate": 9.839178539221952e-06, "loss": 0.8613, "step": 3073 }, { "epoch": 0.1085792245981041, "grad_norm": 2.1016318798065186, "learning_rate": 9.839034599552166e-06, "loss": 0.8902, "step": 3074 }, { "epoch": 0.108614546401812, "grad_norm": 1.7583651542663574, "learning_rate": 9.838890596550005e-06, "loss": 0.8995, "step": 3075 }, { "epoch": 0.10864986820551992, "grad_norm": 1.693314790725708, "learning_rate": 9.838746530217353e-06, "loss": 0.9134, "step": 3076 }, { "epoch": 0.10868519000922783, "grad_norm": 1.7751905918121338, "learning_rate": 9.838602400556097e-06, "loss": 0.8724, "step": 3077 }, { "epoch": 0.10872051181293572, "grad_norm": 1.8722261190414429, "learning_rate": 9.838458207568122e-06, "loss": 0.9146, "step": 3078 }, { "epoch": 0.10875583361664364, "grad_norm": 1.8171842098236084, "learning_rate": 9.838313951255316e-06, "loss": 0.8995, "step": 3079 }, { "epoch": 0.10879115542035155, "grad_norm": 1.9709447622299194, "learning_rate": 9.838169631619565e-06, "loss": 0.8781, "step": 3080 }, { "epoch": 0.10882647722405944, "grad_norm": 2.075838327407837, "learning_rate": 9.83802524866276e-06, "loss": 0.8841, "step": 3081 }, { "epoch": 0.10886179902776735, "grad_norm": 1.6412012577056885, "learning_rate": 9.83788080238679e-06, "loss": 0.8784, "step": 3082 }, { "epoch": 0.10889712083147526, "grad_norm": 1.800523281097412, "learning_rate": 9.837736292793548e-06, "loss": 0.8832, "step": 3083 }, { "epoch": 0.10893244263518316, "grad_norm": 1.7369327545166016, "learning_rate": 9.83759171988492e-06, "loss": 0.9122, "step": 3084 }, { "epoch": 0.10896776443889107, "grad_norm": 2.223855495452881, "learning_rate": 9.837447083662801e-06, "loss": 0.8969, "step": 3085 }, { "epoch": 0.10900308624259898, "grad_norm": 1.7785532474517822, "learning_rate": 9.837302384129084e-06, "loss": 0.8443, "step": 3086 }, { "epoch": 0.10903840804630688, "grad_norm": 1.7508634328842163, "learning_rate": 9.837157621285663e-06, "loss": 0.8859, "step": 3087 }, { "epoch": 0.10907372985001479, "grad_norm": 1.7991026639938354, "learning_rate": 9.837012795134433e-06, "loss": 0.893, "step": 3088 }, { "epoch": 0.1091090516537227, "grad_norm": 2.015378475189209, "learning_rate": 9.836867905677289e-06, "loss": 0.8978, "step": 3089 }, { "epoch": 0.10914437345743061, "grad_norm": 2.056036949157715, "learning_rate": 9.836722952916125e-06, "loss": 0.8798, "step": 3090 }, { "epoch": 0.1091796952611385, "grad_norm": 1.104459285736084, "learning_rate": 9.836577936852843e-06, "loss": 0.6151, "step": 3091 }, { "epoch": 0.10921501706484642, "grad_norm": 2.0210373401641846, "learning_rate": 9.836432857489336e-06, "loss": 0.9072, "step": 3092 }, { "epoch": 0.10925033886855433, "grad_norm": 1.9605662822723389, "learning_rate": 9.836287714827507e-06, "loss": 0.8702, "step": 3093 }, { "epoch": 0.10928566067226222, "grad_norm": 1.7672451734542847, "learning_rate": 9.836142508869252e-06, "loss": 0.8674, "step": 3094 }, { "epoch": 0.10932098247597014, "grad_norm": 1.680959939956665, "learning_rate": 9.835997239616473e-06, "loss": 0.8613, "step": 3095 }, { "epoch": 0.10935630427967805, "grad_norm": 2.8118348121643066, "learning_rate": 9.83585190707107e-06, "loss": 0.8564, "step": 3096 }, { "epoch": 0.10939162608338594, "grad_norm": 1.9742956161499023, "learning_rate": 9.835706511234948e-06, "loss": 0.907, "step": 3097 }, { "epoch": 0.10942694788709385, "grad_norm": 2.012584924697876, "learning_rate": 9.835561052110007e-06, "loss": 0.8762, "step": 3098 }, { "epoch": 0.10946226969080176, "grad_norm": 1.8384463787078857, "learning_rate": 9.835415529698154e-06, "loss": 0.8646, "step": 3099 }, { "epoch": 0.10949759149450966, "grad_norm": 2.0254456996917725, "learning_rate": 9.835269944001288e-06, "loss": 0.8724, "step": 3100 }, { "epoch": 0.10953291329821757, "grad_norm": 1.8506190776824951, "learning_rate": 9.83512429502132e-06, "loss": 0.8996, "step": 3101 }, { "epoch": 0.10956823510192548, "grad_norm": 1.6813123226165771, "learning_rate": 9.834978582760155e-06, "loss": 0.8937, "step": 3102 }, { "epoch": 0.10960355690563339, "grad_norm": 1.526676893234253, "learning_rate": 9.834832807219699e-06, "loss": 0.8437, "step": 3103 }, { "epoch": 0.10963887870934129, "grad_norm": 1.9253928661346436, "learning_rate": 9.834686968401858e-06, "loss": 0.917, "step": 3104 }, { "epoch": 0.1096742005130492, "grad_norm": 2.073922634124756, "learning_rate": 9.834541066308545e-06, "loss": 0.8687, "step": 3105 }, { "epoch": 0.10970952231675711, "grad_norm": 1.901161789894104, "learning_rate": 9.834395100941667e-06, "loss": 0.867, "step": 3106 }, { "epoch": 0.10974484412046501, "grad_norm": 1.6004823446273804, "learning_rate": 9.834249072303134e-06, "loss": 0.8675, "step": 3107 }, { "epoch": 0.10978016592417292, "grad_norm": 1.993043303489685, "learning_rate": 9.834102980394858e-06, "loss": 0.8864, "step": 3108 }, { "epoch": 0.10981548772788083, "grad_norm": 2.302004814147949, "learning_rate": 9.833956825218751e-06, "loss": 0.8507, "step": 3109 }, { "epoch": 0.10985080953158873, "grad_norm": 1.8054897785186768, "learning_rate": 9.833810606776725e-06, "loss": 0.918, "step": 3110 }, { "epoch": 0.10988613133529664, "grad_norm": 1.7120968103408813, "learning_rate": 9.833664325070698e-06, "loss": 0.8811, "step": 3111 }, { "epoch": 0.10992145313900455, "grad_norm": 1.8990905284881592, "learning_rate": 9.833517980102577e-06, "loss": 0.8824, "step": 3112 }, { "epoch": 0.10995677494271244, "grad_norm": 1.831276535987854, "learning_rate": 9.833371571874281e-06, "loss": 0.8853, "step": 3113 }, { "epoch": 0.10999209674642035, "grad_norm": 1.7601393461227417, "learning_rate": 9.833225100387729e-06, "loss": 0.9279, "step": 3114 }, { "epoch": 0.11002741855012826, "grad_norm": 1.747782588005066, "learning_rate": 9.833078565644835e-06, "loss": 0.9028, "step": 3115 }, { "epoch": 0.11006274035383617, "grad_norm": 1.6628562211990356, "learning_rate": 9.832931967647517e-06, "loss": 0.8729, "step": 3116 }, { "epoch": 0.11009806215754407, "grad_norm": 1.7128058671951294, "learning_rate": 9.832785306397693e-06, "loss": 0.9105, "step": 3117 }, { "epoch": 0.11013338396125198, "grad_norm": 1.7499806880950928, "learning_rate": 9.832638581897283e-06, "loss": 0.8874, "step": 3118 }, { "epoch": 0.11016870576495989, "grad_norm": 1.8497259616851807, "learning_rate": 9.83249179414821e-06, "loss": 0.8695, "step": 3119 }, { "epoch": 0.11020402756866779, "grad_norm": 1.7308191061019897, "learning_rate": 9.83234494315239e-06, "loss": 0.8471, "step": 3120 }, { "epoch": 0.1102393493723757, "grad_norm": 1.8364216089248657, "learning_rate": 9.83219802891175e-06, "loss": 0.8937, "step": 3121 }, { "epoch": 0.11027467117608361, "grad_norm": 1.7243114709854126, "learning_rate": 9.832051051428208e-06, "loss": 0.907, "step": 3122 }, { "epoch": 0.11030999297979151, "grad_norm": 1.8624950647354126, "learning_rate": 9.831904010703692e-06, "loss": 0.8697, "step": 3123 }, { "epoch": 0.11034531478349942, "grad_norm": 1.8716861009597778, "learning_rate": 9.831756906740124e-06, "loss": 0.8899, "step": 3124 }, { "epoch": 0.11038063658720733, "grad_norm": 1.773949146270752, "learning_rate": 9.831609739539432e-06, "loss": 0.8814, "step": 3125 }, { "epoch": 0.11041595839091523, "grad_norm": 1.65836763381958, "learning_rate": 9.831462509103537e-06, "loss": 0.8679, "step": 3126 }, { "epoch": 0.11045128019462314, "grad_norm": 1.9953334331512451, "learning_rate": 9.831315215434371e-06, "loss": 0.8558, "step": 3127 }, { "epoch": 0.11048660199833105, "grad_norm": 1.7647737264633179, "learning_rate": 9.831167858533859e-06, "loss": 0.867, "step": 3128 }, { "epoch": 0.11052192380203896, "grad_norm": 1.7831823825836182, "learning_rate": 9.831020438403929e-06, "loss": 0.8769, "step": 3129 }, { "epoch": 0.11055724560574685, "grad_norm": 1.7615320682525635, "learning_rate": 9.830872955046514e-06, "loss": 0.8611, "step": 3130 }, { "epoch": 0.11059256740945476, "grad_norm": 1.7121719121932983, "learning_rate": 9.83072540846354e-06, "loss": 0.8887, "step": 3131 }, { "epoch": 0.11062788921316268, "grad_norm": 1.7750980854034424, "learning_rate": 9.83057779865694e-06, "loss": 0.9018, "step": 3132 }, { "epoch": 0.11066321101687057, "grad_norm": 1.2192137241363525, "learning_rate": 9.830430125628647e-06, "loss": 0.6093, "step": 3133 }, { "epoch": 0.11069853282057848, "grad_norm": 1.7907803058624268, "learning_rate": 9.830282389380592e-06, "loss": 0.8891, "step": 3134 }, { "epoch": 0.11073385462428639, "grad_norm": 1.9354910850524902, "learning_rate": 9.830134589914708e-06, "loss": 0.8957, "step": 3135 }, { "epoch": 0.11076917642799429, "grad_norm": 1.8058178424835205, "learning_rate": 9.829986727232931e-06, "loss": 0.8938, "step": 3136 }, { "epoch": 0.1108044982317022, "grad_norm": 1.7345099449157715, "learning_rate": 9.829838801337198e-06, "loss": 0.9188, "step": 3137 }, { "epoch": 0.11083982003541011, "grad_norm": 1.8104480504989624, "learning_rate": 9.82969081222944e-06, "loss": 0.8714, "step": 3138 }, { "epoch": 0.11087514183911801, "grad_norm": 1.8359285593032837, "learning_rate": 9.829542759911597e-06, "loss": 0.8596, "step": 3139 }, { "epoch": 0.11091046364282592, "grad_norm": 2.434000015258789, "learning_rate": 9.829394644385607e-06, "loss": 0.8816, "step": 3140 }, { "epoch": 0.11094578544653383, "grad_norm": 1.755082368850708, "learning_rate": 9.829246465653406e-06, "loss": 0.8788, "step": 3141 }, { "epoch": 0.11098110725024174, "grad_norm": 1.808274507522583, "learning_rate": 9.829098223716938e-06, "loss": 0.8771, "step": 3142 }, { "epoch": 0.11101642905394964, "grad_norm": 1.6829338073730469, "learning_rate": 9.828949918578136e-06, "loss": 0.9088, "step": 3143 }, { "epoch": 0.11105175085765755, "grad_norm": 1.7928465604782104, "learning_rate": 9.82880155023895e-06, "loss": 0.8954, "step": 3144 }, { "epoch": 0.11108707266136546, "grad_norm": 2.094954252243042, "learning_rate": 9.828653118701314e-06, "loss": 0.8659, "step": 3145 }, { "epoch": 0.11112239446507335, "grad_norm": 1.159881353378296, "learning_rate": 9.828504623967175e-06, "loss": 0.6236, "step": 3146 }, { "epoch": 0.11115771626878126, "grad_norm": 1.888966679573059, "learning_rate": 9.828356066038473e-06, "loss": 0.8628, "step": 3147 }, { "epoch": 0.11119303807248918, "grad_norm": 1.8824901580810547, "learning_rate": 9.828207444917157e-06, "loss": 0.8995, "step": 3148 }, { "epoch": 0.11122835987619707, "grad_norm": 1.777969479560852, "learning_rate": 9.828058760605168e-06, "loss": 0.8775, "step": 3149 }, { "epoch": 0.11126368167990498, "grad_norm": 1.8414963483810425, "learning_rate": 9.827910013104454e-06, "loss": 0.8994, "step": 3150 }, { "epoch": 0.1112990034836129, "grad_norm": 1.88438880443573, "learning_rate": 9.82776120241696e-06, "loss": 0.8861, "step": 3151 }, { "epoch": 0.1113343252873208, "grad_norm": 1.7763643264770508, "learning_rate": 9.827612328544638e-06, "loss": 0.8911, "step": 3152 }, { "epoch": 0.1113696470910287, "grad_norm": 2.037884473800659, "learning_rate": 9.827463391489432e-06, "loss": 0.9131, "step": 3153 }, { "epoch": 0.11140496889473661, "grad_norm": 1.9565736055374146, "learning_rate": 9.82731439125329e-06, "loss": 0.9202, "step": 3154 }, { "epoch": 0.11144029069844452, "grad_norm": 1.862903356552124, "learning_rate": 9.827165327838169e-06, "loss": 0.8952, "step": 3155 }, { "epoch": 0.11147561250215242, "grad_norm": 1.0774239301681519, "learning_rate": 9.827016201246014e-06, "loss": 0.5871, "step": 3156 }, { "epoch": 0.11151093430586033, "grad_norm": 1.758156657218933, "learning_rate": 9.826867011478776e-06, "loss": 0.8918, "step": 3157 }, { "epoch": 0.11154625610956824, "grad_norm": 1.8477017879486084, "learning_rate": 9.826717758538413e-06, "loss": 0.868, "step": 3158 }, { "epoch": 0.11158157791327614, "grad_norm": 1.9641220569610596, "learning_rate": 9.826568442426873e-06, "loss": 0.9214, "step": 3159 }, { "epoch": 0.11161689971698405, "grad_norm": 1.7468351125717163, "learning_rate": 9.826419063146113e-06, "loss": 0.9062, "step": 3160 }, { "epoch": 0.11165222152069196, "grad_norm": 1.8917664289474487, "learning_rate": 9.826269620698088e-06, "loss": 0.8933, "step": 3161 }, { "epoch": 0.11168754332439985, "grad_norm": 1.859663963317871, "learning_rate": 9.826120115084754e-06, "loss": 0.8829, "step": 3162 }, { "epoch": 0.11172286512810777, "grad_norm": 2.012969732284546, "learning_rate": 9.825970546308067e-06, "loss": 0.8963, "step": 3163 }, { "epoch": 0.11175818693181568, "grad_norm": 1.807573914527893, "learning_rate": 9.825820914369983e-06, "loss": 0.8942, "step": 3164 }, { "epoch": 0.11179350873552359, "grad_norm": 1.9893642663955688, "learning_rate": 9.825671219272465e-06, "loss": 0.9333, "step": 3165 }, { "epoch": 0.11182883053923148, "grad_norm": 1.813463807106018, "learning_rate": 9.825521461017467e-06, "loss": 0.8915, "step": 3166 }, { "epoch": 0.1118641523429394, "grad_norm": 1.663996934890747, "learning_rate": 9.825371639606952e-06, "loss": 0.8681, "step": 3167 }, { "epoch": 0.1118994741466473, "grad_norm": 1.778528094291687, "learning_rate": 9.82522175504288e-06, "loss": 0.8795, "step": 3168 }, { "epoch": 0.1119347959503552, "grad_norm": 1.8352845907211304, "learning_rate": 9.825071807327211e-06, "loss": 0.8949, "step": 3169 }, { "epoch": 0.11197011775406311, "grad_norm": 1.70255708694458, "learning_rate": 9.824921796461913e-06, "loss": 0.8929, "step": 3170 }, { "epoch": 0.11200543955777102, "grad_norm": 1.74273681640625, "learning_rate": 9.824771722448943e-06, "loss": 0.9126, "step": 3171 }, { "epoch": 0.11204076136147892, "grad_norm": 1.6700208187103271, "learning_rate": 9.824621585290268e-06, "loss": 0.8492, "step": 3172 }, { "epoch": 0.11207608316518683, "grad_norm": 1.8229138851165771, "learning_rate": 9.824471384987851e-06, "loss": 0.9108, "step": 3173 }, { "epoch": 0.11211140496889474, "grad_norm": 1.8224371671676636, "learning_rate": 9.824321121543662e-06, "loss": 0.8837, "step": 3174 }, { "epoch": 0.11214672677260264, "grad_norm": 1.889609456062317, "learning_rate": 9.824170794959665e-06, "loss": 0.8833, "step": 3175 }, { "epoch": 0.11218204857631055, "grad_norm": 1.868157148361206, "learning_rate": 9.824020405237825e-06, "loss": 0.8731, "step": 3176 }, { "epoch": 0.11221737038001846, "grad_norm": 1.7482951879501343, "learning_rate": 9.823869952380113e-06, "loss": 0.8757, "step": 3177 }, { "epoch": 0.11225269218372637, "grad_norm": 1.9191021919250488, "learning_rate": 9.8237194363885e-06, "loss": 0.8863, "step": 3178 }, { "epoch": 0.11228801398743427, "grad_norm": 1.774522304534912, "learning_rate": 9.823568857264954e-06, "loss": 0.8791, "step": 3179 }, { "epoch": 0.11232333579114218, "grad_norm": 1.7916390895843506, "learning_rate": 9.823418215011444e-06, "loss": 0.8613, "step": 3180 }, { "epoch": 0.11235865759485009, "grad_norm": 1.8951947689056396, "learning_rate": 9.823267509629944e-06, "loss": 0.894, "step": 3181 }, { "epoch": 0.11239397939855798, "grad_norm": 1.7094887495040894, "learning_rate": 9.823116741122424e-06, "loss": 0.905, "step": 3182 }, { "epoch": 0.1124293012022659, "grad_norm": 1.8005726337432861, "learning_rate": 9.82296590949086e-06, "loss": 0.8651, "step": 3183 }, { "epoch": 0.1124646230059738, "grad_norm": 2.102633476257324, "learning_rate": 9.822815014737224e-06, "loss": 0.8883, "step": 3184 }, { "epoch": 0.1124999448096817, "grad_norm": 1.6876765489578247, "learning_rate": 9.822664056863492e-06, "loss": 0.8787, "step": 3185 }, { "epoch": 0.11253526661338961, "grad_norm": 1.7562000751495361, "learning_rate": 9.82251303587164e-06, "loss": 0.8936, "step": 3186 }, { "epoch": 0.11257058841709752, "grad_norm": 1.619240641593933, "learning_rate": 9.822361951763644e-06, "loss": 0.8649, "step": 3187 }, { "epoch": 0.11260591022080542, "grad_norm": 1.6523407697677612, "learning_rate": 9.822210804541481e-06, "loss": 0.8503, "step": 3188 }, { "epoch": 0.11264123202451333, "grad_norm": 1.966201663017273, "learning_rate": 9.822059594207128e-06, "loss": 0.8994, "step": 3189 }, { "epoch": 0.11267655382822124, "grad_norm": 1.8158930540084839, "learning_rate": 9.821908320762568e-06, "loss": 0.8928, "step": 3190 }, { "epoch": 0.11271187563192915, "grad_norm": 1.8727335929870605, "learning_rate": 9.821756984209777e-06, "loss": 0.9524, "step": 3191 }, { "epoch": 0.11274719743563705, "grad_norm": 4.260619640350342, "learning_rate": 9.821605584550737e-06, "loss": 0.8783, "step": 3192 }, { "epoch": 0.11278251923934496, "grad_norm": 1.6612255573272705, "learning_rate": 9.821454121787429e-06, "loss": 0.9004, "step": 3193 }, { "epoch": 0.11281784104305287, "grad_norm": 1.806523084640503, "learning_rate": 9.821302595921835e-06, "loss": 0.9454, "step": 3194 }, { "epoch": 0.11285316284676077, "grad_norm": 2.1230602264404297, "learning_rate": 9.82115100695594e-06, "loss": 0.9016, "step": 3195 }, { "epoch": 0.11288848465046868, "grad_norm": 1.881370186805725, "learning_rate": 9.820999354891728e-06, "loss": 0.9283, "step": 3196 }, { "epoch": 0.11292380645417659, "grad_norm": 1.9161828756332397, "learning_rate": 9.820847639731182e-06, "loss": 0.8864, "step": 3197 }, { "epoch": 0.11295912825788448, "grad_norm": 1.6656081676483154, "learning_rate": 9.820695861476287e-06, "loss": 0.8548, "step": 3198 }, { "epoch": 0.1129944500615924, "grad_norm": 1.8484952449798584, "learning_rate": 9.820544020129032e-06, "loss": 0.9056, "step": 3199 }, { "epoch": 0.1130297718653003, "grad_norm": 1.9693495035171509, "learning_rate": 9.820392115691404e-06, "loss": 0.8902, "step": 3200 }, { "epoch": 0.1130650936690082, "grad_norm": 1.8147234916687012, "learning_rate": 9.820240148165389e-06, "loss": 0.9144, "step": 3201 }, { "epoch": 0.11310041547271611, "grad_norm": 1.900713324546814, "learning_rate": 9.820088117552977e-06, "loss": 0.8656, "step": 3202 }, { "epoch": 0.11313573727642402, "grad_norm": 1.730904221534729, "learning_rate": 9.819936023856159e-06, "loss": 0.8692, "step": 3203 }, { "epoch": 0.11317105908013193, "grad_norm": 2.490792989730835, "learning_rate": 9.819783867076922e-06, "loss": 0.9147, "step": 3204 }, { "epoch": 0.11320638088383983, "grad_norm": 5.245167255401611, "learning_rate": 9.819631647217262e-06, "loss": 0.8977, "step": 3205 }, { "epoch": 0.11324170268754774, "grad_norm": 1.8883377313613892, "learning_rate": 9.819479364279168e-06, "loss": 0.9038, "step": 3206 }, { "epoch": 0.11327702449125565, "grad_norm": 1.7517077922821045, "learning_rate": 9.819327018264632e-06, "loss": 0.8413, "step": 3207 }, { "epoch": 0.11331234629496355, "grad_norm": 1.807979941368103, "learning_rate": 9.819174609175653e-06, "loss": 0.9152, "step": 3208 }, { "epoch": 0.11334766809867146, "grad_norm": 1.848181962966919, "learning_rate": 9.819022137014222e-06, "loss": 0.8829, "step": 3209 }, { "epoch": 0.11338298990237937, "grad_norm": 1.8317945003509521, "learning_rate": 9.818869601782335e-06, "loss": 0.9181, "step": 3210 }, { "epoch": 0.11341831170608727, "grad_norm": 1.7357850074768066, "learning_rate": 9.818717003481988e-06, "loss": 0.8886, "step": 3211 }, { "epoch": 0.11345363350979518, "grad_norm": 1.827142596244812, "learning_rate": 9.818564342115179e-06, "loss": 0.9118, "step": 3212 }, { "epoch": 0.11348895531350309, "grad_norm": 2.002180814743042, "learning_rate": 9.818411617683905e-06, "loss": 0.8858, "step": 3213 }, { "epoch": 0.11352427711721098, "grad_norm": 1.8666690587997437, "learning_rate": 9.818258830190165e-06, "loss": 0.8786, "step": 3214 }, { "epoch": 0.1135595989209189, "grad_norm": 1.6586662530899048, "learning_rate": 9.81810597963596e-06, "loss": 0.8787, "step": 3215 }, { "epoch": 0.1135949207246268, "grad_norm": 1.8913613557815552, "learning_rate": 9.81795306602329e-06, "loss": 0.9025, "step": 3216 }, { "epoch": 0.11363024252833472, "grad_norm": 1.8234248161315918, "learning_rate": 9.817800089354157e-06, "loss": 0.886, "step": 3217 }, { "epoch": 0.11366556433204261, "grad_norm": 1.6995517015457153, "learning_rate": 9.817647049630562e-06, "loss": 0.8243, "step": 3218 }, { "epoch": 0.11370088613575052, "grad_norm": 1.7601721286773682, "learning_rate": 9.817493946854506e-06, "loss": 0.8808, "step": 3219 }, { "epoch": 0.11373620793945843, "grad_norm": 1.8436883687973022, "learning_rate": 9.817340781027996e-06, "loss": 0.8941, "step": 3220 }, { "epoch": 0.11377152974316633, "grad_norm": 2.1855480670928955, "learning_rate": 9.817187552153036e-06, "loss": 0.8781, "step": 3221 }, { "epoch": 0.11380685154687424, "grad_norm": 1.9719269275665283, "learning_rate": 9.81703426023163e-06, "loss": 0.9062, "step": 3222 }, { "epoch": 0.11384217335058215, "grad_norm": 1.694737195968628, "learning_rate": 9.816880905265784e-06, "loss": 0.8787, "step": 3223 }, { "epoch": 0.11387749515429005, "grad_norm": 1.7360345125198364, "learning_rate": 9.816727487257508e-06, "loss": 0.8918, "step": 3224 }, { "epoch": 0.11391281695799796, "grad_norm": 2.0109899044036865, "learning_rate": 9.816574006208808e-06, "loss": 0.9055, "step": 3225 }, { "epoch": 0.11394813876170587, "grad_norm": 2.0627808570861816, "learning_rate": 9.816420462121692e-06, "loss": 0.9101, "step": 3226 }, { "epoch": 0.11398346056541377, "grad_norm": 1.8395861387252808, "learning_rate": 9.81626685499817e-06, "loss": 0.8868, "step": 3227 }, { "epoch": 0.11401878236912168, "grad_norm": 1.8893381357192993, "learning_rate": 9.816113184840253e-06, "loss": 0.8802, "step": 3228 }, { "epoch": 0.11405410417282959, "grad_norm": 1.8798104524612427, "learning_rate": 9.815959451649953e-06, "loss": 0.9239, "step": 3229 }, { "epoch": 0.1140894259765375, "grad_norm": 1.869521975517273, "learning_rate": 9.81580565542928e-06, "loss": 0.9051, "step": 3230 }, { "epoch": 0.1141247477802454, "grad_norm": 1.8258639574050903, "learning_rate": 9.815651796180248e-06, "loss": 0.8757, "step": 3231 }, { "epoch": 0.1141600695839533, "grad_norm": 1.6669466495513916, "learning_rate": 9.81549787390487e-06, "loss": 0.8743, "step": 3232 }, { "epoch": 0.11419539138766122, "grad_norm": 1.7427998781204224, "learning_rate": 9.815343888605163e-06, "loss": 0.8658, "step": 3233 }, { "epoch": 0.11423071319136911, "grad_norm": 1.7516165971755981, "learning_rate": 9.815189840283139e-06, "loss": 0.8693, "step": 3234 }, { "epoch": 0.11426603499507702, "grad_norm": 1.8599374294281006, "learning_rate": 9.815035728940816e-06, "loss": 0.8784, "step": 3235 }, { "epoch": 0.11430135679878493, "grad_norm": 1.7958239316940308, "learning_rate": 9.81488155458021e-06, "loss": 0.9165, "step": 3236 }, { "epoch": 0.11433667860249283, "grad_norm": 1.6259901523590088, "learning_rate": 9.81472731720334e-06, "loss": 0.8749, "step": 3237 }, { "epoch": 0.11437200040620074, "grad_norm": 1.8254632949829102, "learning_rate": 9.814573016812224e-06, "loss": 0.845, "step": 3238 }, { "epoch": 0.11440732220990865, "grad_norm": 1.7367419004440308, "learning_rate": 9.81441865340888e-06, "loss": 0.8619, "step": 3239 }, { "epoch": 0.11444264401361655, "grad_norm": 1.637856364250183, "learning_rate": 9.814264226995331e-06, "loss": 0.8767, "step": 3240 }, { "epoch": 0.11447796581732446, "grad_norm": 1.8767465353012085, "learning_rate": 9.814109737573596e-06, "loss": 0.8767, "step": 3241 }, { "epoch": 0.11451328762103237, "grad_norm": 1.8691456317901611, "learning_rate": 9.813955185145698e-06, "loss": 0.8953, "step": 3242 }, { "epoch": 0.11454860942474028, "grad_norm": 1.7621111869812012, "learning_rate": 9.81380056971366e-06, "loss": 0.8361, "step": 3243 }, { "epoch": 0.11458393122844818, "grad_norm": 1.7217881679534912, "learning_rate": 9.813645891279504e-06, "loss": 0.8906, "step": 3244 }, { "epoch": 0.11461925303215609, "grad_norm": 1.698038101196289, "learning_rate": 9.813491149845255e-06, "loss": 0.9008, "step": 3245 }, { "epoch": 0.114654574835864, "grad_norm": 1.712714433670044, "learning_rate": 9.813336345412941e-06, "loss": 0.8993, "step": 3246 }, { "epoch": 0.1146898966395719, "grad_norm": 1.940372109413147, "learning_rate": 9.813181477984584e-06, "loss": 0.8815, "step": 3247 }, { "epoch": 0.1147252184432798, "grad_norm": 1.8876230716705322, "learning_rate": 9.813026547562213e-06, "loss": 0.9093, "step": 3248 }, { "epoch": 0.11476054024698772, "grad_norm": 1.746267318725586, "learning_rate": 9.812871554147854e-06, "loss": 0.9004, "step": 3249 }, { "epoch": 0.11479586205069561, "grad_norm": 1.7353044748306274, "learning_rate": 9.81271649774354e-06, "loss": 0.8559, "step": 3250 }, { "epoch": 0.11483118385440352, "grad_norm": 1.7917050123214722, "learning_rate": 9.812561378351294e-06, "loss": 0.901, "step": 3251 }, { "epoch": 0.11486650565811143, "grad_norm": 1.8044755458831787, "learning_rate": 9.81240619597315e-06, "loss": 0.8965, "step": 3252 }, { "epoch": 0.11490182746181934, "grad_norm": 1.7643721103668213, "learning_rate": 9.81225095061114e-06, "loss": 0.8931, "step": 3253 }, { "epoch": 0.11493714926552724, "grad_norm": 1.851926565170288, "learning_rate": 9.812095642267293e-06, "loss": 0.9071, "step": 3254 }, { "epoch": 0.11497247106923515, "grad_norm": 1.7901097536087036, "learning_rate": 9.811940270943644e-06, "loss": 0.906, "step": 3255 }, { "epoch": 0.11500779287294306, "grad_norm": 1.730612874031067, "learning_rate": 9.811784836642222e-06, "loss": 0.871, "step": 3256 }, { "epoch": 0.11504311467665096, "grad_norm": 1.840775489807129, "learning_rate": 9.811629339365067e-06, "loss": 0.8701, "step": 3257 }, { "epoch": 0.11507843648035887, "grad_norm": 1.8244459629058838, "learning_rate": 9.811473779114212e-06, "loss": 0.8838, "step": 3258 }, { "epoch": 0.11511375828406678, "grad_norm": 1.78105628490448, "learning_rate": 9.811318155891692e-06, "loss": 0.9006, "step": 3259 }, { "epoch": 0.11514908008777468, "grad_norm": 1.8184733390808105, "learning_rate": 9.811162469699544e-06, "loss": 0.9022, "step": 3260 }, { "epoch": 0.11518440189148259, "grad_norm": 1.6963739395141602, "learning_rate": 9.811006720539806e-06, "loss": 0.8673, "step": 3261 }, { "epoch": 0.1152197236951905, "grad_norm": 1.9232043027877808, "learning_rate": 9.810850908414517e-06, "loss": 0.8755, "step": 3262 }, { "epoch": 0.1152550454988984, "grad_norm": 1.8144714832305908, "learning_rate": 9.810695033325717e-06, "loss": 0.8976, "step": 3263 }, { "epoch": 0.1152903673026063, "grad_norm": 1.7493374347686768, "learning_rate": 9.810539095275442e-06, "loss": 0.8788, "step": 3264 }, { "epoch": 0.11532568910631422, "grad_norm": 1.8289721012115479, "learning_rate": 9.810383094265738e-06, "loss": 0.8719, "step": 3265 }, { "epoch": 0.11536101091002213, "grad_norm": 1.9371998310089111, "learning_rate": 9.810227030298643e-06, "loss": 0.8865, "step": 3266 }, { "epoch": 0.11539633271373002, "grad_norm": 1.7947453260421753, "learning_rate": 9.8100709033762e-06, "loss": 0.8673, "step": 3267 }, { "epoch": 0.11543165451743793, "grad_norm": 1.7521955966949463, "learning_rate": 9.809914713500455e-06, "loss": 0.9143, "step": 3268 }, { "epoch": 0.11546697632114584, "grad_norm": 1.7321579456329346, "learning_rate": 9.80975846067345e-06, "loss": 0.9225, "step": 3269 }, { "epoch": 0.11550229812485374, "grad_norm": 1.625271201133728, "learning_rate": 9.809602144897231e-06, "loss": 0.8464, "step": 3270 }, { "epoch": 0.11553761992856165, "grad_norm": 2.0529839992523193, "learning_rate": 9.809445766173841e-06, "loss": 0.8594, "step": 3271 }, { "epoch": 0.11557294173226956, "grad_norm": 2.0058178901672363, "learning_rate": 9.80928932450533e-06, "loss": 0.8965, "step": 3272 }, { "epoch": 0.11560826353597746, "grad_norm": 2.0249736309051514, "learning_rate": 9.809132819893746e-06, "loss": 0.8761, "step": 3273 }, { "epoch": 0.11564358533968537, "grad_norm": 1.9384018182754517, "learning_rate": 9.808976252341132e-06, "loss": 0.916, "step": 3274 }, { "epoch": 0.11567890714339328, "grad_norm": 1.7863234281539917, "learning_rate": 9.808819621849543e-06, "loss": 0.8911, "step": 3275 }, { "epoch": 0.11571422894710118, "grad_norm": 1.760571837425232, "learning_rate": 9.808662928421027e-06, "loss": 0.8926, "step": 3276 }, { "epoch": 0.11574955075080909, "grad_norm": 1.8897203207015991, "learning_rate": 9.808506172057634e-06, "loss": 0.8817, "step": 3277 }, { "epoch": 0.115784872554517, "grad_norm": 1.651983380317688, "learning_rate": 9.808349352761417e-06, "loss": 0.8768, "step": 3278 }, { "epoch": 0.11582019435822491, "grad_norm": 1.9377189874649048, "learning_rate": 9.808192470534427e-06, "loss": 0.9001, "step": 3279 }, { "epoch": 0.1158555161619328, "grad_norm": 1.6455378532409668, "learning_rate": 9.808035525378719e-06, "loss": 0.8681, "step": 3280 }, { "epoch": 0.11589083796564072, "grad_norm": 1.8454052209854126, "learning_rate": 9.807878517296343e-06, "loss": 0.941, "step": 3281 }, { "epoch": 0.11592615976934863, "grad_norm": 1.7418851852416992, "learning_rate": 9.807721446289358e-06, "loss": 0.9047, "step": 3282 }, { "epoch": 0.11596148157305652, "grad_norm": 1.895588755607605, "learning_rate": 9.807564312359819e-06, "loss": 0.8907, "step": 3283 }, { "epoch": 0.11599680337676443, "grad_norm": 1.6870251893997192, "learning_rate": 9.80740711550978e-06, "loss": 0.8833, "step": 3284 }, { "epoch": 0.11603212518047235, "grad_norm": 1.909267783164978, "learning_rate": 9.807249855741304e-06, "loss": 0.875, "step": 3285 }, { "epoch": 0.11606744698418024, "grad_norm": 1.6631836891174316, "learning_rate": 9.807092533056444e-06, "loss": 0.8653, "step": 3286 }, { "epoch": 0.11610276878788815, "grad_norm": 1.8991750478744507, "learning_rate": 9.806935147457259e-06, "loss": 0.8885, "step": 3287 }, { "epoch": 0.11613809059159606, "grad_norm": 1.7843048572540283, "learning_rate": 9.806777698945812e-06, "loss": 0.8851, "step": 3288 }, { "epoch": 0.11617341239530396, "grad_norm": 1.5996793508529663, "learning_rate": 9.806620187524163e-06, "loss": 0.8302, "step": 3289 }, { "epoch": 0.11620873419901187, "grad_norm": 1.5654656887054443, "learning_rate": 9.80646261319437e-06, "loss": 0.8831, "step": 3290 }, { "epoch": 0.11624405600271978, "grad_norm": 1.7301068305969238, "learning_rate": 9.806304975958501e-06, "loss": 0.8752, "step": 3291 }, { "epoch": 0.11627937780642769, "grad_norm": 1.781589150428772, "learning_rate": 9.806147275818613e-06, "loss": 0.9102, "step": 3292 }, { "epoch": 0.11631469961013559, "grad_norm": 1.7909913063049316, "learning_rate": 9.805989512776774e-06, "loss": 0.8907, "step": 3293 }, { "epoch": 0.1163500214138435, "grad_norm": 1.704766869544983, "learning_rate": 9.805831686835048e-06, "loss": 0.889, "step": 3294 }, { "epoch": 0.11638534321755141, "grad_norm": 1.644591212272644, "learning_rate": 9.805673797995499e-06, "loss": 0.8831, "step": 3295 }, { "epoch": 0.1164206650212593, "grad_norm": 1.6666680574417114, "learning_rate": 9.805515846260196e-06, "loss": 0.8595, "step": 3296 }, { "epoch": 0.11645598682496722, "grad_norm": 1.7120493650436401, "learning_rate": 9.805357831631203e-06, "loss": 0.8951, "step": 3297 }, { "epoch": 0.11649130862867513, "grad_norm": 1.9838858842849731, "learning_rate": 9.805199754110593e-06, "loss": 0.8918, "step": 3298 }, { "epoch": 0.11652663043238302, "grad_norm": 1.696502923965454, "learning_rate": 9.80504161370043e-06, "loss": 0.9066, "step": 3299 }, { "epoch": 0.11656195223609093, "grad_norm": 1.6588507890701294, "learning_rate": 9.804883410402786e-06, "loss": 0.8773, "step": 3300 }, { "epoch": 0.11659727403979885, "grad_norm": 1.9362602233886719, "learning_rate": 9.80472514421973e-06, "loss": 0.8914, "step": 3301 }, { "epoch": 0.11663259584350674, "grad_norm": 1.8742154836654663, "learning_rate": 9.804566815153337e-06, "loss": 0.8997, "step": 3302 }, { "epoch": 0.11666791764721465, "grad_norm": 1.9442806243896484, "learning_rate": 9.804408423205676e-06, "loss": 0.9131, "step": 3303 }, { "epoch": 0.11670323945092256, "grad_norm": 1.7357810735702515, "learning_rate": 9.80424996837882e-06, "loss": 0.8817, "step": 3304 }, { "epoch": 0.11673856125463047, "grad_norm": 1.7551202774047852, "learning_rate": 9.804091450674844e-06, "loss": 0.9069, "step": 3305 }, { "epoch": 0.11677388305833837, "grad_norm": 1.7971712350845337, "learning_rate": 9.803932870095822e-06, "loss": 0.874, "step": 3306 }, { "epoch": 0.11680920486204628, "grad_norm": 1.6751823425292969, "learning_rate": 9.80377422664383e-06, "loss": 0.9004, "step": 3307 }, { "epoch": 0.11684452666575419, "grad_norm": 1.754446029663086, "learning_rate": 9.803615520320942e-06, "loss": 0.8744, "step": 3308 }, { "epoch": 0.11687984846946209, "grad_norm": 1.662906527519226, "learning_rate": 9.80345675112924e-06, "loss": 0.8701, "step": 3309 }, { "epoch": 0.11691517027317, "grad_norm": 1.9058691263198853, "learning_rate": 9.803297919070795e-06, "loss": 0.8568, "step": 3310 }, { "epoch": 0.11695049207687791, "grad_norm": 1.8482115268707275, "learning_rate": 9.803139024147692e-06, "loss": 0.88, "step": 3311 }, { "epoch": 0.1169858138805858, "grad_norm": 1.7100309133529663, "learning_rate": 9.802980066362009e-06, "loss": 0.887, "step": 3312 }, { "epoch": 0.11702113568429372, "grad_norm": 1.6950223445892334, "learning_rate": 9.802821045715826e-06, "loss": 0.8719, "step": 3313 }, { "epoch": 0.11705645748800163, "grad_norm": 1.8299767971038818, "learning_rate": 9.802661962211222e-06, "loss": 0.8886, "step": 3314 }, { "epoch": 0.11709177929170952, "grad_norm": 2.1978986263275146, "learning_rate": 9.802502815850283e-06, "loss": 0.8574, "step": 3315 }, { "epoch": 0.11712710109541744, "grad_norm": 1.7687937021255493, "learning_rate": 9.80234360663509e-06, "loss": 0.8996, "step": 3316 }, { "epoch": 0.11716242289912535, "grad_norm": 1.9409849643707275, "learning_rate": 9.802184334567725e-06, "loss": 0.8904, "step": 3317 }, { "epoch": 0.11719774470283326, "grad_norm": 1.8372774124145508, "learning_rate": 9.802024999650274e-06, "loss": 0.8873, "step": 3318 }, { "epoch": 0.11723306650654115, "grad_norm": 1.76407790184021, "learning_rate": 9.801865601884823e-06, "loss": 0.8563, "step": 3319 }, { "epoch": 0.11726838831024906, "grad_norm": 1.7899117469787598, "learning_rate": 9.801706141273458e-06, "loss": 0.8663, "step": 3320 }, { "epoch": 0.11730371011395697, "grad_norm": 1.6545687913894653, "learning_rate": 9.801546617818268e-06, "loss": 0.8753, "step": 3321 }, { "epoch": 0.11733903191766487, "grad_norm": 1.7008112668991089, "learning_rate": 9.801387031521335e-06, "loss": 0.8678, "step": 3322 }, { "epoch": 0.11737435372137278, "grad_norm": 1.7659047842025757, "learning_rate": 9.801227382384753e-06, "loss": 0.8694, "step": 3323 }, { "epoch": 0.11740967552508069, "grad_norm": 1.6539027690887451, "learning_rate": 9.80106767041061e-06, "loss": 0.8913, "step": 3324 }, { "epoch": 0.11744499732878859, "grad_norm": 1.6767624616622925, "learning_rate": 9.800907895600994e-06, "loss": 0.9032, "step": 3325 }, { "epoch": 0.1174803191324965, "grad_norm": 1.9583672285079956, "learning_rate": 9.800748057957998e-06, "loss": 0.8774, "step": 3326 }, { "epoch": 0.11751564093620441, "grad_norm": 1.720418930053711, "learning_rate": 9.800588157483714e-06, "loss": 0.8831, "step": 3327 }, { "epoch": 0.1175509627399123, "grad_norm": 1.7872668504714966, "learning_rate": 9.800428194180235e-06, "loss": 0.9104, "step": 3328 }, { "epoch": 0.11758628454362022, "grad_norm": 2.0339102745056152, "learning_rate": 9.800268168049654e-06, "loss": 0.9002, "step": 3329 }, { "epoch": 0.11762160634732813, "grad_norm": 1.8505460023880005, "learning_rate": 9.800108079094066e-06, "loss": 0.8717, "step": 3330 }, { "epoch": 0.11765692815103604, "grad_norm": 1.7224360704421997, "learning_rate": 9.799947927315565e-06, "loss": 0.8951, "step": 3331 }, { "epoch": 0.11769224995474394, "grad_norm": 1.6594867706298828, "learning_rate": 9.79978771271625e-06, "loss": 0.8671, "step": 3332 }, { "epoch": 0.11772757175845185, "grad_norm": 1.8305646181106567, "learning_rate": 9.799627435298213e-06, "loss": 0.8916, "step": 3333 }, { "epoch": 0.11776289356215976, "grad_norm": 1.6756833791732788, "learning_rate": 9.799467095063554e-06, "loss": 0.8528, "step": 3334 }, { "epoch": 0.11779821536586765, "grad_norm": 1.7184100151062012, "learning_rate": 9.799306692014374e-06, "loss": 0.8513, "step": 3335 }, { "epoch": 0.11783353716957556, "grad_norm": 1.718321681022644, "learning_rate": 9.79914622615277e-06, "loss": 0.8851, "step": 3336 }, { "epoch": 0.11786885897328347, "grad_norm": 1.8059170246124268, "learning_rate": 9.798985697480842e-06, "loss": 0.8733, "step": 3337 }, { "epoch": 0.11790418077699137, "grad_norm": 1.7853095531463623, "learning_rate": 9.798825106000692e-06, "loss": 0.8615, "step": 3338 }, { "epoch": 0.11793950258069928, "grad_norm": 1.7296812534332275, "learning_rate": 9.798664451714419e-06, "loss": 0.877, "step": 3339 }, { "epoch": 0.11797482438440719, "grad_norm": 1.819650411605835, "learning_rate": 9.798503734624129e-06, "loss": 0.8718, "step": 3340 }, { "epoch": 0.11801014618811509, "grad_norm": 1.7698864936828613, "learning_rate": 9.798342954731925e-06, "loss": 0.9293, "step": 3341 }, { "epoch": 0.118045467991823, "grad_norm": 2.0828425884246826, "learning_rate": 9.798182112039909e-06, "loss": 0.9425, "step": 3342 }, { "epoch": 0.11808078979553091, "grad_norm": 1.8206101655960083, "learning_rate": 9.798021206550188e-06, "loss": 0.9101, "step": 3343 }, { "epoch": 0.11811611159923882, "grad_norm": 1.8997361660003662, "learning_rate": 9.797860238264866e-06, "loss": 0.8685, "step": 3344 }, { "epoch": 0.11815143340294672, "grad_norm": 1.9447299242019653, "learning_rate": 9.797699207186053e-06, "loss": 0.9086, "step": 3345 }, { "epoch": 0.11818675520665463, "grad_norm": 1.7146942615509033, "learning_rate": 9.797538113315854e-06, "loss": 0.9063, "step": 3346 }, { "epoch": 0.11822207701036254, "grad_norm": 1.812296748161316, "learning_rate": 9.79737695665638e-06, "loss": 0.924, "step": 3347 }, { "epoch": 0.11825739881407044, "grad_norm": 1.8761669397354126, "learning_rate": 9.797215737209736e-06, "loss": 0.9033, "step": 3348 }, { "epoch": 0.11829272061777835, "grad_norm": 1.9520906209945679, "learning_rate": 9.797054454978034e-06, "loss": 0.8968, "step": 3349 }, { "epoch": 0.11832804242148626, "grad_norm": 1.7451722621917725, "learning_rate": 9.796893109963384e-06, "loss": 0.9034, "step": 3350 }, { "epoch": 0.11836336422519415, "grad_norm": 1.7786279916763306, "learning_rate": 9.796731702167901e-06, "loss": 0.9238, "step": 3351 }, { "epoch": 0.11839868602890206, "grad_norm": 1.7753173112869263, "learning_rate": 9.796570231593693e-06, "loss": 0.8753, "step": 3352 }, { "epoch": 0.11843400783260997, "grad_norm": 1.794559121131897, "learning_rate": 9.796408698242879e-06, "loss": 0.8608, "step": 3353 }, { "epoch": 0.11846932963631789, "grad_norm": 1.765667200088501, "learning_rate": 9.796247102117567e-06, "loss": 0.8755, "step": 3354 }, { "epoch": 0.11850465144002578, "grad_norm": 1.8728973865509033, "learning_rate": 9.796085443219874e-06, "loss": 0.8594, "step": 3355 }, { "epoch": 0.11853997324373369, "grad_norm": 1.8664450645446777, "learning_rate": 9.795923721551918e-06, "loss": 0.9221, "step": 3356 }, { "epoch": 0.1185752950474416, "grad_norm": 1.9321224689483643, "learning_rate": 9.795761937115813e-06, "loss": 0.8762, "step": 3357 }, { "epoch": 0.1186106168511495, "grad_norm": 1.6742162704467773, "learning_rate": 9.795600089913677e-06, "loss": 0.8854, "step": 3358 }, { "epoch": 0.11864593865485741, "grad_norm": 1.959401249885559, "learning_rate": 9.79543817994763e-06, "loss": 0.881, "step": 3359 }, { "epoch": 0.11868126045856532, "grad_norm": 2.332159996032715, "learning_rate": 9.795276207219787e-06, "loss": 0.8913, "step": 3360 }, { "epoch": 0.11871658226227322, "grad_norm": 1.8573095798492432, "learning_rate": 9.795114171732271e-06, "loss": 0.9086, "step": 3361 }, { "epoch": 0.11875190406598113, "grad_norm": 1.722132682800293, "learning_rate": 9.794952073487204e-06, "loss": 0.8928, "step": 3362 }, { "epoch": 0.11878722586968904, "grad_norm": 2.025437355041504, "learning_rate": 9.794789912486705e-06, "loss": 0.9097, "step": 3363 }, { "epoch": 0.11882254767339694, "grad_norm": 1.8731838464736938, "learning_rate": 9.794627688732896e-06, "loss": 0.9017, "step": 3364 }, { "epoch": 0.11885786947710485, "grad_norm": 2.1330955028533936, "learning_rate": 9.794465402227901e-06, "loss": 0.8815, "step": 3365 }, { "epoch": 0.11889319128081276, "grad_norm": 1.8150453567504883, "learning_rate": 9.794303052973846e-06, "loss": 0.9091, "step": 3366 }, { "epoch": 0.11892851308452067, "grad_norm": 2.2135133743286133, "learning_rate": 9.794140640972851e-06, "loss": 0.9116, "step": 3367 }, { "epoch": 0.11896383488822856, "grad_norm": 1.622301459312439, "learning_rate": 9.793978166227048e-06, "loss": 0.8669, "step": 3368 }, { "epoch": 0.11899915669193648, "grad_norm": 2.0417959690093994, "learning_rate": 9.793815628738557e-06, "loss": 0.8756, "step": 3369 }, { "epoch": 0.11903447849564439, "grad_norm": 1.8309770822525024, "learning_rate": 9.793653028509508e-06, "loss": 0.8753, "step": 3370 }, { "epoch": 0.11906980029935228, "grad_norm": 1.7452197074890137, "learning_rate": 9.793490365542032e-06, "loss": 0.8506, "step": 3371 }, { "epoch": 0.1191051221030602, "grad_norm": 1.7042051553726196, "learning_rate": 9.793327639838253e-06, "loss": 0.8987, "step": 3372 }, { "epoch": 0.1191404439067681, "grad_norm": 1.833768606185913, "learning_rate": 9.793164851400303e-06, "loss": 0.8505, "step": 3373 }, { "epoch": 0.119175765710476, "grad_norm": 1.8930721282958984, "learning_rate": 9.793002000230313e-06, "loss": 0.912, "step": 3374 }, { "epoch": 0.11921108751418391, "grad_norm": 1.6851613521575928, "learning_rate": 9.792839086330415e-06, "loss": 0.8874, "step": 3375 }, { "epoch": 0.11924640931789182, "grad_norm": 1.7443394660949707, "learning_rate": 9.792676109702738e-06, "loss": 0.8986, "step": 3376 }, { "epoch": 0.11928173112159972, "grad_norm": 1.688916802406311, "learning_rate": 9.792513070349417e-06, "loss": 0.8946, "step": 3377 }, { "epoch": 0.11931705292530763, "grad_norm": 2.7006990909576416, "learning_rate": 9.792349968272588e-06, "loss": 0.8688, "step": 3378 }, { "epoch": 0.11935237472901554, "grad_norm": 1.7812741994857788, "learning_rate": 9.792186803474382e-06, "loss": 0.8697, "step": 3379 }, { "epoch": 0.11938769653272345, "grad_norm": 1.9840128421783447, "learning_rate": 9.792023575956936e-06, "loss": 0.879, "step": 3380 }, { "epoch": 0.11942301833643135, "grad_norm": 2.597491502761841, "learning_rate": 9.791860285722386e-06, "loss": 0.879, "step": 3381 }, { "epoch": 0.11945834014013926, "grad_norm": 2.033602476119995, "learning_rate": 9.79169693277287e-06, "loss": 0.9184, "step": 3382 }, { "epoch": 0.11949366194384717, "grad_norm": 1.839597463607788, "learning_rate": 9.791533517110524e-06, "loss": 0.8959, "step": 3383 }, { "epoch": 0.11952898374755506, "grad_norm": 1.7710895538330078, "learning_rate": 9.791370038737489e-06, "loss": 0.9232, "step": 3384 }, { "epoch": 0.11956430555126298, "grad_norm": 1.7708041667938232, "learning_rate": 9.791206497655902e-06, "loss": 0.8707, "step": 3385 }, { "epoch": 0.11959962735497089, "grad_norm": 1.989189624786377, "learning_rate": 9.791042893867908e-06, "loss": 0.8955, "step": 3386 }, { "epoch": 0.11963494915867878, "grad_norm": 1.8214550018310547, "learning_rate": 9.790879227375643e-06, "loss": 0.8444, "step": 3387 }, { "epoch": 0.1196702709623867, "grad_norm": 1.711583137512207, "learning_rate": 9.790715498181253e-06, "loss": 0.9021, "step": 3388 }, { "epoch": 0.1197055927660946, "grad_norm": 1.7087888717651367, "learning_rate": 9.790551706286876e-06, "loss": 0.8451, "step": 3389 }, { "epoch": 0.1197409145698025, "grad_norm": 1.7256896495819092, "learning_rate": 9.790387851694662e-06, "loss": 0.8906, "step": 3390 }, { "epoch": 0.11977623637351041, "grad_norm": 3.0109927654266357, "learning_rate": 9.790223934406752e-06, "loss": 0.8912, "step": 3391 }, { "epoch": 0.11981155817721832, "grad_norm": 1.8351131677627563, "learning_rate": 9.790059954425291e-06, "loss": 0.8993, "step": 3392 }, { "epoch": 0.11984687998092623, "grad_norm": 1.9248625040054321, "learning_rate": 9.789895911752425e-06, "loss": 0.8923, "step": 3393 }, { "epoch": 0.11988220178463413, "grad_norm": 1.7739957571029663, "learning_rate": 9.789731806390303e-06, "loss": 0.869, "step": 3394 }, { "epoch": 0.11991752358834204, "grad_norm": 1.7392808198928833, "learning_rate": 9.789567638341071e-06, "loss": 0.8831, "step": 3395 }, { "epoch": 0.11995284539204995, "grad_norm": 1.7026734352111816, "learning_rate": 9.789403407606877e-06, "loss": 0.8646, "step": 3396 }, { "epoch": 0.11998816719575785, "grad_norm": 1.8562597036361694, "learning_rate": 9.789239114189873e-06, "loss": 0.8962, "step": 3397 }, { "epoch": 0.12002348899946576, "grad_norm": 1.8967455625534058, "learning_rate": 9.789074758092207e-06, "loss": 0.9041, "step": 3398 }, { "epoch": 0.12005881080317367, "grad_norm": 1.7979055643081665, "learning_rate": 9.788910339316031e-06, "loss": 0.8867, "step": 3399 }, { "epoch": 0.12009413260688157, "grad_norm": 1.8438063859939575, "learning_rate": 9.788745857863497e-06, "loss": 0.8926, "step": 3400 }, { "epoch": 0.12012945441058948, "grad_norm": 1.8002097606658936, "learning_rate": 9.788581313736756e-06, "loss": 0.8432, "step": 3401 }, { "epoch": 0.12016477621429739, "grad_norm": 1.9543745517730713, "learning_rate": 9.788416706937965e-06, "loss": 0.8777, "step": 3402 }, { "epoch": 0.12020009801800528, "grad_norm": 1.6773921251296997, "learning_rate": 9.788252037469274e-06, "loss": 0.8533, "step": 3403 }, { "epoch": 0.1202354198217132, "grad_norm": 1.6911749839782715, "learning_rate": 9.788087305332841e-06, "loss": 0.8816, "step": 3404 }, { "epoch": 0.1202707416254211, "grad_norm": 1.7981520891189575, "learning_rate": 9.787922510530823e-06, "loss": 0.8976, "step": 3405 }, { "epoch": 0.12030606342912901, "grad_norm": 1.6113908290863037, "learning_rate": 9.787757653065376e-06, "loss": 0.8847, "step": 3406 }, { "epoch": 0.12034138523283691, "grad_norm": 2.300532102584839, "learning_rate": 9.787592732938654e-06, "loss": 0.9147, "step": 3407 }, { "epoch": 0.12037670703654482, "grad_norm": 1.769169569015503, "learning_rate": 9.787427750152818e-06, "loss": 0.8646, "step": 3408 }, { "epoch": 0.12041202884025273, "grad_norm": 2.1000351905822754, "learning_rate": 9.78726270471003e-06, "loss": 0.9402, "step": 3409 }, { "epoch": 0.12044735064396063, "grad_norm": 1.626038908958435, "learning_rate": 9.787097596612447e-06, "loss": 0.8544, "step": 3410 }, { "epoch": 0.12048267244766854, "grad_norm": 2.1762468814849854, "learning_rate": 9.78693242586223e-06, "loss": 0.8661, "step": 3411 }, { "epoch": 0.12051799425137645, "grad_norm": 2.2270970344543457, "learning_rate": 9.786767192461541e-06, "loss": 0.8987, "step": 3412 }, { "epoch": 0.12055331605508435, "grad_norm": 1.6455014944076538, "learning_rate": 9.786601896412542e-06, "loss": 0.8811, "step": 3413 }, { "epoch": 0.12058863785879226, "grad_norm": 1.7353249788284302, "learning_rate": 9.786436537717398e-06, "loss": 0.878, "step": 3414 }, { "epoch": 0.12062395966250017, "grad_norm": 1.9643062353134155, "learning_rate": 9.786271116378274e-06, "loss": 0.8772, "step": 3415 }, { "epoch": 0.12065928146620807, "grad_norm": 1.6408613920211792, "learning_rate": 9.78610563239733e-06, "loss": 0.8328, "step": 3416 }, { "epoch": 0.12069460326991598, "grad_norm": 1.714219570159912, "learning_rate": 9.78594008577674e-06, "loss": 0.9161, "step": 3417 }, { "epoch": 0.12072992507362389, "grad_norm": 1.7134181261062622, "learning_rate": 9.785774476518662e-06, "loss": 0.8724, "step": 3418 }, { "epoch": 0.1207652468773318, "grad_norm": 2.0160105228424072, "learning_rate": 9.785608804625269e-06, "loss": 0.9061, "step": 3419 }, { "epoch": 0.1208005686810397, "grad_norm": 1.945053219795227, "learning_rate": 9.785443070098726e-06, "loss": 0.8812, "step": 3420 }, { "epoch": 0.1208358904847476, "grad_norm": 1.890068531036377, "learning_rate": 9.785277272941205e-06, "loss": 0.8623, "step": 3421 }, { "epoch": 0.12087121228845552, "grad_norm": 1.8245171308517456, "learning_rate": 9.785111413154873e-06, "loss": 0.8937, "step": 3422 }, { "epoch": 0.12090653409216341, "grad_norm": 1.9199198484420776, "learning_rate": 9.784945490741904e-06, "loss": 0.8806, "step": 3423 }, { "epoch": 0.12094185589587132, "grad_norm": 1.7587525844573975, "learning_rate": 9.784779505704468e-06, "loss": 0.9115, "step": 3424 }, { "epoch": 0.12097717769957923, "grad_norm": 1.7497379779815674, "learning_rate": 9.784613458044737e-06, "loss": 0.8633, "step": 3425 }, { "epoch": 0.12101249950328713, "grad_norm": 1.7651033401489258, "learning_rate": 9.784447347764883e-06, "loss": 0.8739, "step": 3426 }, { "epoch": 0.12104782130699504, "grad_norm": 1.9870200157165527, "learning_rate": 9.784281174867082e-06, "loss": 0.9125, "step": 3427 }, { "epoch": 0.12108314311070295, "grad_norm": 1.5341683626174927, "learning_rate": 9.784114939353508e-06, "loss": 0.8485, "step": 3428 }, { "epoch": 0.12111846491441085, "grad_norm": 1.651956558227539, "learning_rate": 9.78394864122634e-06, "loss": 0.8824, "step": 3429 }, { "epoch": 0.12115378671811876, "grad_norm": 1.7879204750061035, "learning_rate": 9.783782280487747e-06, "loss": 0.8822, "step": 3430 }, { "epoch": 0.12118910852182667, "grad_norm": 1.7928831577301025, "learning_rate": 9.783615857139914e-06, "loss": 0.8824, "step": 3431 }, { "epoch": 0.12122443032553458, "grad_norm": 1.7941240072250366, "learning_rate": 9.783449371185016e-06, "loss": 0.8739, "step": 3432 }, { "epoch": 0.12125975212924248, "grad_norm": 1.8750617504119873, "learning_rate": 9.783282822625231e-06, "loss": 0.8708, "step": 3433 }, { "epoch": 0.12129507393295039, "grad_norm": 1.86324942111969, "learning_rate": 9.783116211462738e-06, "loss": 0.9191, "step": 3434 }, { "epoch": 0.1213303957366583, "grad_norm": 1.7562884092330933, "learning_rate": 9.782949537699722e-06, "loss": 0.815, "step": 3435 }, { "epoch": 0.1213657175403662, "grad_norm": 1.7405507564544678, "learning_rate": 9.782782801338359e-06, "loss": 0.8589, "step": 3436 }, { "epoch": 0.1214010393440741, "grad_norm": 1.8436839580535889, "learning_rate": 9.782616002380834e-06, "loss": 0.8737, "step": 3437 }, { "epoch": 0.12143636114778202, "grad_norm": 1.7409604787826538, "learning_rate": 9.782449140829331e-06, "loss": 0.9117, "step": 3438 }, { "epoch": 0.12147168295148991, "grad_norm": 1.7582921981811523, "learning_rate": 9.782282216686033e-06, "loss": 0.8699, "step": 3439 }, { "epoch": 0.12150700475519782, "grad_norm": 1.7158241271972656, "learning_rate": 9.782115229953123e-06, "loss": 0.8809, "step": 3440 }, { "epoch": 0.12154232655890573, "grad_norm": 2.0480432510375977, "learning_rate": 9.781948180632788e-06, "loss": 0.9228, "step": 3441 }, { "epoch": 0.12157764836261363, "grad_norm": 1.9092377424240112, "learning_rate": 9.781781068727215e-06, "loss": 0.8936, "step": 3442 }, { "epoch": 0.12161297016632154, "grad_norm": 1.832180142402649, "learning_rate": 9.78161389423859e-06, "loss": 0.8877, "step": 3443 }, { "epoch": 0.12164829197002945, "grad_norm": 1.9895737171173096, "learning_rate": 9.781446657169101e-06, "loss": 0.8774, "step": 3444 }, { "epoch": 0.12168361377373736, "grad_norm": 1.7931041717529297, "learning_rate": 9.781279357520937e-06, "loss": 0.8782, "step": 3445 }, { "epoch": 0.12171893557744526, "grad_norm": 1.7934250831604004, "learning_rate": 9.781111995296287e-06, "loss": 0.8366, "step": 3446 }, { "epoch": 0.12175425738115317, "grad_norm": 1.7573609352111816, "learning_rate": 9.78094457049734e-06, "loss": 0.8695, "step": 3447 }, { "epoch": 0.12178957918486108, "grad_norm": 1.815329909324646, "learning_rate": 9.780777083126294e-06, "loss": 0.8747, "step": 3448 }, { "epoch": 0.12182490098856898, "grad_norm": 1.6735929250717163, "learning_rate": 9.780609533185335e-06, "loss": 0.845, "step": 3449 }, { "epoch": 0.12186022279227689, "grad_norm": 1.7375155687332153, "learning_rate": 9.780441920676656e-06, "loss": 0.8851, "step": 3450 }, { "epoch": 0.1218955445959848, "grad_norm": 1.777306079864502, "learning_rate": 9.78027424560245e-06, "loss": 0.8924, "step": 3451 }, { "epoch": 0.1219308663996927, "grad_norm": 1.8882004022598267, "learning_rate": 9.780106507964914e-06, "loss": 0.8624, "step": 3452 }, { "epoch": 0.1219661882034006, "grad_norm": 1.962626338005066, "learning_rate": 9.779938707766245e-06, "loss": 0.8999, "step": 3453 }, { "epoch": 0.12200151000710852, "grad_norm": 1.8804960250854492, "learning_rate": 9.779770845008635e-06, "loss": 0.8757, "step": 3454 }, { "epoch": 0.12203683181081641, "grad_norm": 1.981655478477478, "learning_rate": 9.779602919694284e-06, "loss": 0.8768, "step": 3455 }, { "epoch": 0.12207215361452432, "grad_norm": 2.1676642894744873, "learning_rate": 9.779434931825387e-06, "loss": 0.8891, "step": 3456 }, { "epoch": 0.12210747541823223, "grad_norm": 1.8794313669204712, "learning_rate": 9.779266881404143e-06, "loss": 0.8394, "step": 3457 }, { "epoch": 0.12214279722194014, "grad_norm": 1.9805524349212646, "learning_rate": 9.779098768432756e-06, "loss": 0.8539, "step": 3458 }, { "epoch": 0.12217811902564804, "grad_norm": 1.8783271312713623, "learning_rate": 9.77893059291342e-06, "loss": 0.8882, "step": 3459 }, { "epoch": 0.12221344082935595, "grad_norm": 1.608127474784851, "learning_rate": 9.778762354848339e-06, "loss": 0.8617, "step": 3460 }, { "epoch": 0.12224876263306386, "grad_norm": 8.333426475524902, "learning_rate": 9.778594054239714e-06, "loss": 0.8933, "step": 3461 }, { "epoch": 0.12228408443677176, "grad_norm": 2.095229387283325, "learning_rate": 9.77842569108975e-06, "loss": 0.8526, "step": 3462 }, { "epoch": 0.12231940624047967, "grad_norm": 1.7457664012908936, "learning_rate": 9.778257265400649e-06, "loss": 0.9046, "step": 3463 }, { "epoch": 0.12235472804418758, "grad_norm": 1.9021257162094116, "learning_rate": 9.778088777174612e-06, "loss": 0.867, "step": 3464 }, { "epoch": 0.12239004984789548, "grad_norm": 1.7404319047927856, "learning_rate": 9.777920226413851e-06, "loss": 0.8675, "step": 3465 }, { "epoch": 0.12242537165160339, "grad_norm": 1.9787884950637817, "learning_rate": 9.777751613120564e-06, "loss": 0.9042, "step": 3466 }, { "epoch": 0.1224606934553113, "grad_norm": 1.702858567237854, "learning_rate": 9.777582937296966e-06, "loss": 0.8861, "step": 3467 }, { "epoch": 0.12249601525901921, "grad_norm": 1.9173877239227295, "learning_rate": 9.77741419894526e-06, "loss": 0.9198, "step": 3468 }, { "epoch": 0.1225313370627271, "grad_norm": 1.9553227424621582, "learning_rate": 9.777245398067653e-06, "loss": 0.9003, "step": 3469 }, { "epoch": 0.12256665886643502, "grad_norm": 1.9143173694610596, "learning_rate": 9.777076534666359e-06, "loss": 0.8779, "step": 3470 }, { "epoch": 0.12260198067014293, "grad_norm": 1.8161306381225586, "learning_rate": 9.776907608743584e-06, "loss": 0.8704, "step": 3471 }, { "epoch": 0.12263730247385082, "grad_norm": 2.516998767852783, "learning_rate": 9.776738620301541e-06, "loss": 0.8681, "step": 3472 }, { "epoch": 0.12267262427755873, "grad_norm": 1.8757902383804321, "learning_rate": 9.776569569342437e-06, "loss": 0.8887, "step": 3473 }, { "epoch": 0.12270794608126664, "grad_norm": 2.1596240997314453, "learning_rate": 9.776400455868494e-06, "loss": 0.8897, "step": 3474 }, { "epoch": 0.12274326788497454, "grad_norm": 1.657623529434204, "learning_rate": 9.776231279881916e-06, "loss": 0.9348, "step": 3475 }, { "epoch": 0.12277858968868245, "grad_norm": 2.247170925140381, "learning_rate": 9.776062041384922e-06, "loss": 0.8852, "step": 3476 }, { "epoch": 0.12281391149239036, "grad_norm": 1.9658832550048828, "learning_rate": 9.775892740379724e-06, "loss": 0.8981, "step": 3477 }, { "epoch": 0.12284923329609826, "grad_norm": 1.9148004055023193, "learning_rate": 9.775723376868543e-06, "loss": 0.8608, "step": 3478 }, { "epoch": 0.12288455509980617, "grad_norm": 1.752835988998413, "learning_rate": 9.775553950853589e-06, "loss": 0.8708, "step": 3479 }, { "epoch": 0.12291987690351408, "grad_norm": 1.831687331199646, "learning_rate": 9.775384462337084e-06, "loss": 0.8862, "step": 3480 }, { "epoch": 0.12295519870722199, "grad_norm": 1.9530953168869019, "learning_rate": 9.775214911321244e-06, "loss": 0.8946, "step": 3481 }, { "epoch": 0.12299052051092989, "grad_norm": 1.703466534614563, "learning_rate": 9.77504529780829e-06, "loss": 0.8672, "step": 3482 }, { "epoch": 0.1230258423146378, "grad_norm": 1.747099757194519, "learning_rate": 9.77487562180044e-06, "loss": 0.8747, "step": 3483 }, { "epoch": 0.12306116411834571, "grad_norm": 2.1015267372131348, "learning_rate": 9.774705883299918e-06, "loss": 0.8667, "step": 3484 }, { "epoch": 0.1230964859220536, "grad_norm": 1.8502804040908813, "learning_rate": 9.77453608230894e-06, "loss": 0.8735, "step": 3485 }, { "epoch": 0.12313180772576152, "grad_norm": 1.9413480758666992, "learning_rate": 9.774366218829731e-06, "loss": 0.9171, "step": 3486 }, { "epoch": 0.12316712952946943, "grad_norm": 1.6634267568588257, "learning_rate": 9.774196292864516e-06, "loss": 0.8969, "step": 3487 }, { "epoch": 0.12320245133317732, "grad_norm": 1.7266364097595215, "learning_rate": 9.774026304415518e-06, "loss": 0.8341, "step": 3488 }, { "epoch": 0.12323777313688523, "grad_norm": 1.77049720287323, "learning_rate": 9.77385625348496e-06, "loss": 0.8715, "step": 3489 }, { "epoch": 0.12327309494059314, "grad_norm": 1.7985378503799438, "learning_rate": 9.773686140075068e-06, "loss": 0.8854, "step": 3490 }, { "epoch": 0.12330841674430104, "grad_norm": 1.989953637123108, "learning_rate": 9.77351596418807e-06, "loss": 0.917, "step": 3491 }, { "epoch": 0.12334373854800895, "grad_norm": 1.7692512273788452, "learning_rate": 9.773345725826192e-06, "loss": 0.8728, "step": 3492 }, { "epoch": 0.12337906035171686, "grad_norm": 1.787928819656372, "learning_rate": 9.773175424991663e-06, "loss": 0.8963, "step": 3493 }, { "epoch": 0.12341438215542477, "grad_norm": 1.8809692859649658, "learning_rate": 9.77300506168671e-06, "loss": 0.8702, "step": 3494 }, { "epoch": 0.12344970395913267, "grad_norm": 1.8706434965133667, "learning_rate": 9.772834635913567e-06, "loss": 0.8966, "step": 3495 }, { "epoch": 0.12348502576284058, "grad_norm": 1.8063544034957886, "learning_rate": 9.772664147674457e-06, "loss": 0.8619, "step": 3496 }, { "epoch": 0.12352034756654849, "grad_norm": 1.900991439819336, "learning_rate": 9.772493596971619e-06, "loss": 0.8922, "step": 3497 }, { "epoch": 0.12355566937025639, "grad_norm": 2.0824174880981445, "learning_rate": 9.772322983807282e-06, "loss": 0.8523, "step": 3498 }, { "epoch": 0.1235909911739643, "grad_norm": 1.687606692314148, "learning_rate": 9.772152308183678e-06, "loss": 0.8883, "step": 3499 }, { "epoch": 0.12362631297767221, "grad_norm": 1.7766591310501099, "learning_rate": 9.771981570103042e-06, "loss": 0.8861, "step": 3500 }, { "epoch": 0.1236616347813801, "grad_norm": 1.9855860471725464, "learning_rate": 9.771810769567608e-06, "loss": 0.8472, "step": 3501 }, { "epoch": 0.12369695658508802, "grad_norm": 1.7440611124038696, "learning_rate": 9.77163990657961e-06, "loss": 0.8631, "step": 3502 }, { "epoch": 0.12373227838879593, "grad_norm": 1.925026297569275, "learning_rate": 9.771468981141289e-06, "loss": 0.87, "step": 3503 }, { "epoch": 0.12376760019250382, "grad_norm": 1.6731021404266357, "learning_rate": 9.771297993254879e-06, "loss": 0.8788, "step": 3504 }, { "epoch": 0.12380292199621173, "grad_norm": 1.75933837890625, "learning_rate": 9.771126942922615e-06, "loss": 0.8712, "step": 3505 }, { "epoch": 0.12383824379991965, "grad_norm": 1.5844985246658325, "learning_rate": 9.77095583014674e-06, "loss": 0.8815, "step": 3506 }, { "epoch": 0.12387356560362756, "grad_norm": 1.9164574146270752, "learning_rate": 9.770784654929491e-06, "loss": 0.8967, "step": 3507 }, { "epoch": 0.12390888740733545, "grad_norm": 1.7033028602600098, "learning_rate": 9.770613417273109e-06, "loss": 0.8799, "step": 3508 }, { "epoch": 0.12394420921104336, "grad_norm": 1.7919844388961792, "learning_rate": 9.770442117179838e-06, "loss": 0.8887, "step": 3509 }, { "epoch": 0.12397953101475127, "grad_norm": 1.6512757539749146, "learning_rate": 9.770270754651914e-06, "loss": 0.8652, "step": 3510 }, { "epoch": 0.12401485281845917, "grad_norm": 1.9420021772384644, "learning_rate": 9.770099329691585e-06, "loss": 0.8749, "step": 3511 }, { "epoch": 0.12405017462216708, "grad_norm": 1.8425322771072388, "learning_rate": 9.769927842301093e-06, "loss": 0.8709, "step": 3512 }, { "epoch": 0.12408549642587499, "grad_norm": 1.7066326141357422, "learning_rate": 9.769756292482681e-06, "loss": 0.8421, "step": 3513 }, { "epoch": 0.12412081822958289, "grad_norm": 1.9367438554763794, "learning_rate": 9.769584680238596e-06, "loss": 0.8608, "step": 3514 }, { "epoch": 0.1241561400332908, "grad_norm": 1.851083755493164, "learning_rate": 9.769413005571082e-06, "loss": 0.8774, "step": 3515 }, { "epoch": 0.12419146183699871, "grad_norm": 3.1880035400390625, "learning_rate": 9.769241268482388e-06, "loss": 0.9074, "step": 3516 }, { "epoch": 0.1242267836407066, "grad_norm": 1.6234804391860962, "learning_rate": 9.769069468974761e-06, "loss": 0.879, "step": 3517 }, { "epoch": 0.12426210544441452, "grad_norm": 1.7857773303985596, "learning_rate": 9.768897607050449e-06, "loss": 0.8986, "step": 3518 }, { "epoch": 0.12429742724812243, "grad_norm": 1.7257013320922852, "learning_rate": 9.768725682711702e-06, "loss": 0.8587, "step": 3519 }, { "epoch": 0.12433274905183034, "grad_norm": 1.7475093603134155, "learning_rate": 9.768553695960769e-06, "loss": 0.8942, "step": 3520 }, { "epoch": 0.12436807085553823, "grad_norm": 1.6849778890609741, "learning_rate": 9.7683816467999e-06, "loss": 0.8575, "step": 3521 }, { "epoch": 0.12440339265924615, "grad_norm": 1.666132926940918, "learning_rate": 9.768209535231353e-06, "loss": 0.8951, "step": 3522 }, { "epoch": 0.12443871446295406, "grad_norm": 1.6209551095962524, "learning_rate": 9.768037361257372e-06, "loss": 0.8705, "step": 3523 }, { "epoch": 0.12447403626666195, "grad_norm": 1.8836517333984375, "learning_rate": 9.767865124880217e-06, "loss": 0.8612, "step": 3524 }, { "epoch": 0.12450935807036986, "grad_norm": 1.8537986278533936, "learning_rate": 9.767692826102136e-06, "loss": 0.8917, "step": 3525 }, { "epoch": 0.12454467987407777, "grad_norm": 1.789312481880188, "learning_rate": 9.76752046492539e-06, "loss": 0.8623, "step": 3526 }, { "epoch": 0.12458000167778567, "grad_norm": 1.8655041456222534, "learning_rate": 9.767348041352232e-06, "loss": 0.8611, "step": 3527 }, { "epoch": 0.12461532348149358, "grad_norm": 1.6160399913787842, "learning_rate": 9.767175555384918e-06, "loss": 0.8577, "step": 3528 }, { "epoch": 0.12465064528520149, "grad_norm": 1.7383683919906616, "learning_rate": 9.767003007025708e-06, "loss": 0.8841, "step": 3529 }, { "epoch": 0.12468596708890939, "grad_norm": 1.700331449508667, "learning_rate": 9.766830396276858e-06, "loss": 0.8353, "step": 3530 }, { "epoch": 0.1247212888926173, "grad_norm": 1.7755051851272583, "learning_rate": 9.766657723140628e-06, "loss": 0.9072, "step": 3531 }, { "epoch": 0.12475661069632521, "grad_norm": 1.8249640464782715, "learning_rate": 9.766484987619278e-06, "loss": 0.8909, "step": 3532 }, { "epoch": 0.12479193250003312, "grad_norm": 1.6046719551086426, "learning_rate": 9.766312189715068e-06, "loss": 0.8943, "step": 3533 }, { "epoch": 0.12482725430374102, "grad_norm": 1.8786665201187134, "learning_rate": 9.766139329430259e-06, "loss": 0.9292, "step": 3534 }, { "epoch": 0.12486257610744893, "grad_norm": 1.790678858757019, "learning_rate": 9.765966406767117e-06, "loss": 0.8768, "step": 3535 }, { "epoch": 0.12489789791115684, "grad_norm": 1.9741859436035156, "learning_rate": 9.7657934217279e-06, "loss": 0.8757, "step": 3536 }, { "epoch": 0.12493321971486473, "grad_norm": 1.7432658672332764, "learning_rate": 9.765620374314877e-06, "loss": 0.8796, "step": 3537 }, { "epoch": 0.12496854151857265, "grad_norm": 1.5841857194900513, "learning_rate": 9.765447264530309e-06, "loss": 0.8494, "step": 3538 }, { "epoch": 0.12500386332228056, "grad_norm": 1.666174292564392, "learning_rate": 9.765274092376463e-06, "loss": 0.8818, "step": 3539 }, { "epoch": 0.12503918512598847, "grad_norm": 1.7109792232513428, "learning_rate": 9.765100857855605e-06, "loss": 0.8668, "step": 3540 }, { "epoch": 0.12507450692969638, "grad_norm": 1.9101104736328125, "learning_rate": 9.764927560970003e-06, "loss": 0.8805, "step": 3541 }, { "epoch": 0.12510982873340426, "grad_norm": 1.7188148498535156, "learning_rate": 9.764754201721927e-06, "loss": 0.8938, "step": 3542 }, { "epoch": 0.12514515053711217, "grad_norm": 1.9902229309082031, "learning_rate": 9.764580780113641e-06, "loss": 0.8599, "step": 3543 }, { "epoch": 0.12518047234082008, "grad_norm": 1.8226590156555176, "learning_rate": 9.764407296147418e-06, "loss": 0.8803, "step": 3544 }, { "epoch": 0.125215794144528, "grad_norm": 1.6767476797103882, "learning_rate": 9.764233749825528e-06, "loss": 0.8949, "step": 3545 }, { "epoch": 0.1252511159482359, "grad_norm": 1.7217003107070923, "learning_rate": 9.764060141150242e-06, "loss": 0.8819, "step": 3546 }, { "epoch": 0.1252864377519438, "grad_norm": 1.857093095779419, "learning_rate": 9.763886470123833e-06, "loss": 0.9106, "step": 3547 }, { "epoch": 0.1253217595556517, "grad_norm": 1.7950693368911743, "learning_rate": 9.763712736748574e-06, "loss": 0.8437, "step": 3548 }, { "epoch": 0.1253570813593596, "grad_norm": 1.614937663078308, "learning_rate": 9.763538941026736e-06, "loss": 0.8855, "step": 3549 }, { "epoch": 0.12539240316306752, "grad_norm": 1.7751504182815552, "learning_rate": 9.763365082960597e-06, "loss": 0.8823, "step": 3550 }, { "epoch": 0.12542772496677543, "grad_norm": 1.6319575309753418, "learning_rate": 9.763191162552433e-06, "loss": 0.895, "step": 3551 }, { "epoch": 0.12546304677048334, "grad_norm": 2.765211582183838, "learning_rate": 9.763017179804516e-06, "loss": 0.862, "step": 3552 }, { "epoch": 0.12549836857419125, "grad_norm": 1.751434326171875, "learning_rate": 9.762843134719127e-06, "loss": 0.8978, "step": 3553 }, { "epoch": 0.12553369037789916, "grad_norm": 1.9706947803497314, "learning_rate": 9.762669027298541e-06, "loss": 0.8815, "step": 3554 }, { "epoch": 0.12556901218160704, "grad_norm": 1.6903071403503418, "learning_rate": 9.762494857545038e-06, "loss": 0.8931, "step": 3555 }, { "epoch": 0.12560433398531495, "grad_norm": 1.6772336959838867, "learning_rate": 9.7623206254609e-06, "loss": 0.8635, "step": 3556 }, { "epoch": 0.12563965578902286, "grad_norm": 1.7849159240722656, "learning_rate": 9.762146331048401e-06, "loss": 0.8894, "step": 3557 }, { "epoch": 0.12567497759273077, "grad_norm": 1.8373618125915527, "learning_rate": 9.76197197430983e-06, "loss": 0.8786, "step": 3558 }, { "epoch": 0.12571029939643869, "grad_norm": 1.6933265924453735, "learning_rate": 9.761797555247462e-06, "loss": 0.8508, "step": 3559 }, { "epoch": 0.1257456212001466, "grad_norm": 1.7358993291854858, "learning_rate": 9.761623073863585e-06, "loss": 0.9035, "step": 3560 }, { "epoch": 0.12578094300385448, "grad_norm": 1.9274606704711914, "learning_rate": 9.761448530160477e-06, "loss": 0.909, "step": 3561 }, { "epoch": 0.1258162648075624, "grad_norm": 1.8628568649291992, "learning_rate": 9.761273924140429e-06, "loss": 0.8771, "step": 3562 }, { "epoch": 0.1258515866112703, "grad_norm": 1.9942141771316528, "learning_rate": 9.761099255805721e-06, "loss": 0.8587, "step": 3563 }, { "epoch": 0.1258869084149782, "grad_norm": 1.8383625745773315, "learning_rate": 9.76092452515864e-06, "loss": 0.8889, "step": 3564 }, { "epoch": 0.12592223021868612, "grad_norm": 1.0208333730697632, "learning_rate": 9.760749732201477e-06, "loss": 0.582, "step": 3565 }, { "epoch": 0.12595755202239403, "grad_norm": 1.9726033210754395, "learning_rate": 9.760574876936515e-06, "loss": 0.8483, "step": 3566 }, { "epoch": 0.12599287382610194, "grad_norm": 2.754286527633667, "learning_rate": 9.760399959366045e-06, "loss": 0.8789, "step": 3567 }, { "epoch": 0.12602819562980982, "grad_norm": 2.0496792793273926, "learning_rate": 9.760224979492353e-06, "loss": 0.8651, "step": 3568 }, { "epoch": 0.12606351743351774, "grad_norm": 1.9613641500473022, "learning_rate": 9.760049937317732e-06, "loss": 0.8701, "step": 3569 }, { "epoch": 0.12609883923722565, "grad_norm": 1.636734962463379, "learning_rate": 9.759874832844473e-06, "loss": 0.8957, "step": 3570 }, { "epoch": 0.12613416104093356, "grad_norm": 1.1044384241104126, "learning_rate": 9.759699666074866e-06, "loss": 0.5871, "step": 3571 }, { "epoch": 0.12616948284464147, "grad_norm": 5.489566802978516, "learning_rate": 9.759524437011205e-06, "loss": 0.8914, "step": 3572 }, { "epoch": 0.12620480464834938, "grad_norm": 1.9605908393859863, "learning_rate": 9.759349145655783e-06, "loss": 0.8572, "step": 3573 }, { "epoch": 0.12624012645205726, "grad_norm": 1.9702950716018677, "learning_rate": 9.759173792010892e-06, "loss": 0.8799, "step": 3574 }, { "epoch": 0.12627544825576517, "grad_norm": 2.0819923877716064, "learning_rate": 9.758998376078833e-06, "loss": 0.8597, "step": 3575 }, { "epoch": 0.12631077005947308, "grad_norm": 1.9671117067337036, "learning_rate": 9.758822897861894e-06, "loss": 0.8649, "step": 3576 }, { "epoch": 0.126346091863181, "grad_norm": 1.9910486936569214, "learning_rate": 9.758647357362379e-06, "loss": 0.8689, "step": 3577 }, { "epoch": 0.1263814136668889, "grad_norm": 2.0798654556274414, "learning_rate": 9.75847175458258e-06, "loss": 0.8946, "step": 3578 }, { "epoch": 0.1264167354705968, "grad_norm": 1.946618676185608, "learning_rate": 9.758296089524796e-06, "loss": 0.8442, "step": 3579 }, { "epoch": 0.12645205727430472, "grad_norm": 1.7245211601257324, "learning_rate": 9.758120362191328e-06, "loss": 0.8437, "step": 3580 }, { "epoch": 0.1264873790780126, "grad_norm": 1.8047815561294556, "learning_rate": 9.757944572584476e-06, "loss": 0.8602, "step": 3581 }, { "epoch": 0.12652270088172052, "grad_norm": 2.4372196197509766, "learning_rate": 9.757768720706539e-06, "loss": 0.8957, "step": 3582 }, { "epoch": 0.12655802268542843, "grad_norm": 1.9336950778961182, "learning_rate": 9.75759280655982e-06, "loss": 0.8763, "step": 3583 }, { "epoch": 0.12659334448913634, "grad_norm": 1.645517110824585, "learning_rate": 9.75741683014662e-06, "loss": 0.8456, "step": 3584 }, { "epoch": 0.12662866629284425, "grad_norm": 1.8884880542755127, "learning_rate": 9.757240791469244e-06, "loss": 0.8628, "step": 3585 }, { "epoch": 0.12666398809655216, "grad_norm": 2.002194404602051, "learning_rate": 9.757064690529993e-06, "loss": 0.8882, "step": 3586 }, { "epoch": 0.12669930990026004, "grad_norm": 1.7391291856765747, "learning_rate": 9.756888527331176e-06, "loss": 0.8725, "step": 3587 }, { "epoch": 0.12673463170396795, "grad_norm": 1.825221061706543, "learning_rate": 9.756712301875094e-06, "loss": 0.9032, "step": 3588 }, { "epoch": 0.12676995350767586, "grad_norm": 2.147611618041992, "learning_rate": 9.756536014164057e-06, "loss": 0.8577, "step": 3589 }, { "epoch": 0.12680527531138377, "grad_norm": 1.8152990341186523, "learning_rate": 9.75635966420037e-06, "loss": 0.8417, "step": 3590 }, { "epoch": 0.12684059711509169, "grad_norm": 1.9219722747802734, "learning_rate": 9.756183251986342e-06, "loss": 0.8598, "step": 3591 }, { "epoch": 0.1268759189187996, "grad_norm": 1.7525168657302856, "learning_rate": 9.756006777524281e-06, "loss": 0.8819, "step": 3592 }, { "epoch": 0.1269112407225075, "grad_norm": 1.8801792860031128, "learning_rate": 9.755830240816498e-06, "loss": 0.8395, "step": 3593 }, { "epoch": 0.1269465625262154, "grad_norm": 1.6518467664718628, "learning_rate": 9.755653641865303e-06, "loss": 0.8819, "step": 3594 }, { "epoch": 0.1269818843299233, "grad_norm": 1.771705985069275, "learning_rate": 9.755476980673006e-06, "loss": 0.8499, "step": 3595 }, { "epoch": 0.1270172061336312, "grad_norm": 1.8745908737182617, "learning_rate": 9.755300257241922e-06, "loss": 0.8551, "step": 3596 }, { "epoch": 0.12705252793733912, "grad_norm": 1.767012357711792, "learning_rate": 9.75512347157436e-06, "loss": 0.8855, "step": 3597 }, { "epoch": 0.12708784974104703, "grad_norm": 1.7130610942840576, "learning_rate": 9.754946623672637e-06, "loss": 0.8529, "step": 3598 }, { "epoch": 0.12712317154475494, "grad_norm": 1.8067736625671387, "learning_rate": 9.754769713539068e-06, "loss": 0.8956, "step": 3599 }, { "epoch": 0.12715849334846283, "grad_norm": 1.714902400970459, "learning_rate": 9.754592741175965e-06, "loss": 0.8717, "step": 3600 }, { "epoch": 0.12719381515217074, "grad_norm": 2.1710238456726074, "learning_rate": 9.754415706585646e-06, "loss": 0.8506, "step": 3601 }, { "epoch": 0.12722913695587865, "grad_norm": 1.8712023496627808, "learning_rate": 9.754238609770428e-06, "loss": 0.8992, "step": 3602 }, { "epoch": 0.12726445875958656, "grad_norm": 1.7561817169189453, "learning_rate": 9.754061450732628e-06, "loss": 0.8902, "step": 3603 }, { "epoch": 0.12729978056329447, "grad_norm": 1.7456154823303223, "learning_rate": 9.753884229474566e-06, "loss": 0.8973, "step": 3604 }, { "epoch": 0.12733510236700238, "grad_norm": 1.8369076251983643, "learning_rate": 9.753706945998562e-06, "loss": 0.871, "step": 3605 }, { "epoch": 0.1273704241707103, "grad_norm": 1.7916367053985596, "learning_rate": 9.753529600306934e-06, "loss": 0.8765, "step": 3606 }, { "epoch": 0.12740574597441817, "grad_norm": 1.8617585897445679, "learning_rate": 9.753352192402005e-06, "loss": 0.8998, "step": 3607 }, { "epoch": 0.12744106777812608, "grad_norm": 1.6974409818649292, "learning_rate": 9.753174722286094e-06, "loss": 0.8789, "step": 3608 }, { "epoch": 0.127476389581834, "grad_norm": 1.9057292938232422, "learning_rate": 9.752997189961528e-06, "loss": 0.8381, "step": 3609 }, { "epoch": 0.1275117113855419, "grad_norm": 1.932363510131836, "learning_rate": 9.752819595430627e-06, "loss": 0.8851, "step": 3610 }, { "epoch": 0.12754703318924981, "grad_norm": 1.7017003297805786, "learning_rate": 9.752641938695716e-06, "loss": 0.8709, "step": 3611 }, { "epoch": 0.12758235499295772, "grad_norm": 2.078080177307129, "learning_rate": 9.752464219759124e-06, "loss": 0.8848, "step": 3612 }, { "epoch": 0.12761767679666564, "grad_norm": 2.1007871627807617, "learning_rate": 9.75228643862317e-06, "loss": 0.8713, "step": 3613 }, { "epoch": 0.12765299860037352, "grad_norm": 1.9048422574996948, "learning_rate": 9.752108595290186e-06, "loss": 0.8556, "step": 3614 }, { "epoch": 0.12768832040408143, "grad_norm": 1.6671017408370972, "learning_rate": 9.7519306897625e-06, "loss": 0.8716, "step": 3615 }, { "epoch": 0.12772364220778934, "grad_norm": 1.8576428890228271, "learning_rate": 9.751752722042435e-06, "loss": 0.9028, "step": 3616 }, { "epoch": 0.12775896401149725, "grad_norm": 1.9158501625061035, "learning_rate": 9.751574692132324e-06, "loss": 0.875, "step": 3617 }, { "epoch": 0.12779428581520516, "grad_norm": 1.7332537174224854, "learning_rate": 9.751396600034498e-06, "loss": 0.8875, "step": 3618 }, { "epoch": 0.12782960761891307, "grad_norm": 2.067656993865967, "learning_rate": 9.751218445751285e-06, "loss": 0.8668, "step": 3619 }, { "epoch": 0.12786492942262095, "grad_norm": 1.8070513010025024, "learning_rate": 9.751040229285019e-06, "loss": 0.8988, "step": 3620 }, { "epoch": 0.12790025122632886, "grad_norm": 1.7145055532455444, "learning_rate": 9.750861950638031e-06, "loss": 0.8536, "step": 3621 }, { "epoch": 0.12793557303003678, "grad_norm": 1.6999515295028687, "learning_rate": 9.750683609812656e-06, "loss": 0.8662, "step": 3622 }, { "epoch": 0.12797089483374469, "grad_norm": 2.411557197570801, "learning_rate": 9.750505206811227e-06, "loss": 0.8795, "step": 3623 }, { "epoch": 0.1280062166374526, "grad_norm": 1.6769092082977295, "learning_rate": 9.750326741636079e-06, "loss": 0.8962, "step": 3624 }, { "epoch": 0.1280415384411605, "grad_norm": 1.8378639221191406, "learning_rate": 9.750148214289545e-06, "loss": 0.9144, "step": 3625 }, { "epoch": 0.12807686024486842, "grad_norm": 1.7961093187332153, "learning_rate": 9.749969624773967e-06, "loss": 0.8331, "step": 3626 }, { "epoch": 0.1281121820485763, "grad_norm": 1.618138313293457, "learning_rate": 9.749790973091677e-06, "loss": 0.869, "step": 3627 }, { "epoch": 0.1281475038522842, "grad_norm": 1.608733892440796, "learning_rate": 9.749612259245018e-06, "loss": 0.8595, "step": 3628 }, { "epoch": 0.12818282565599212, "grad_norm": 1.70088529586792, "learning_rate": 9.749433483236325e-06, "loss": 0.8612, "step": 3629 }, { "epoch": 0.12821814745970003, "grad_norm": 1.642261266708374, "learning_rate": 9.74925464506794e-06, "loss": 0.8834, "step": 3630 }, { "epoch": 0.12825346926340794, "grad_norm": 1.861010193824768, "learning_rate": 9.749075744742204e-06, "loss": 0.8761, "step": 3631 }, { "epoch": 0.12828879106711585, "grad_norm": 1.853089690208435, "learning_rate": 9.748896782261456e-06, "loss": 0.844, "step": 3632 }, { "epoch": 0.12832411287082374, "grad_norm": 1.783234715461731, "learning_rate": 9.748717757628039e-06, "loss": 0.8965, "step": 3633 }, { "epoch": 0.12835943467453165, "grad_norm": 1.6945797204971313, "learning_rate": 9.7485386708443e-06, "loss": 0.8632, "step": 3634 }, { "epoch": 0.12839475647823956, "grad_norm": 1.654286503791809, "learning_rate": 9.748359521912575e-06, "loss": 0.9157, "step": 3635 }, { "epoch": 0.12843007828194747, "grad_norm": 1.7661595344543457, "learning_rate": 9.748180310835216e-06, "loss": 0.8757, "step": 3636 }, { "epoch": 0.12846540008565538, "grad_norm": 1.7077198028564453, "learning_rate": 9.748001037614565e-06, "loss": 0.8544, "step": 3637 }, { "epoch": 0.1285007218893633, "grad_norm": 1.6579240560531616, "learning_rate": 9.747821702252967e-06, "loss": 0.8769, "step": 3638 }, { "epoch": 0.1285360436930712, "grad_norm": 1.7610466480255127, "learning_rate": 9.747642304752774e-06, "loss": 0.8753, "step": 3639 }, { "epoch": 0.12857136549677908, "grad_norm": 1.1230297088623047, "learning_rate": 9.747462845116329e-06, "loss": 0.6012, "step": 3640 }, { "epoch": 0.128606687300487, "grad_norm": 1.7003371715545654, "learning_rate": 9.747283323345984e-06, "loss": 0.8564, "step": 3641 }, { "epoch": 0.1286420091041949, "grad_norm": 1.732524037361145, "learning_rate": 9.747103739444086e-06, "loss": 0.8595, "step": 3642 }, { "epoch": 0.12867733090790281, "grad_norm": 0.9978538751602173, "learning_rate": 9.746924093412988e-06, "loss": 0.6007, "step": 3643 }, { "epoch": 0.12871265271161073, "grad_norm": 1.947309136390686, "learning_rate": 9.74674438525504e-06, "loss": 0.866, "step": 3644 }, { "epoch": 0.12874797451531864, "grad_norm": 1.751338005065918, "learning_rate": 9.746564614972592e-06, "loss": 0.8544, "step": 3645 }, { "epoch": 0.12878329631902652, "grad_norm": 1.8706837892532349, "learning_rate": 9.746384782567999e-06, "loss": 0.8792, "step": 3646 }, { "epoch": 0.12881861812273443, "grad_norm": 1.8041601181030273, "learning_rate": 9.746204888043615e-06, "loss": 0.9437, "step": 3647 }, { "epoch": 0.12885393992644234, "grad_norm": 1.6092103719711304, "learning_rate": 9.746024931401795e-06, "loss": 0.8743, "step": 3648 }, { "epoch": 0.12888926173015025, "grad_norm": 1.7868573665618896, "learning_rate": 9.74584491264489e-06, "loss": 0.9124, "step": 3649 }, { "epoch": 0.12892458353385816, "grad_norm": 1.9181185960769653, "learning_rate": 9.74566483177526e-06, "loss": 0.8502, "step": 3650 }, { "epoch": 0.12895990533756607, "grad_norm": 1.845571517944336, "learning_rate": 9.745484688795261e-06, "loss": 0.9351, "step": 3651 }, { "epoch": 0.12899522714127398, "grad_norm": 1.7715224027633667, "learning_rate": 9.74530448370725e-06, "loss": 0.9193, "step": 3652 }, { "epoch": 0.12903054894498187, "grad_norm": 1.7317529916763306, "learning_rate": 9.745124216513586e-06, "loss": 0.8544, "step": 3653 }, { "epoch": 0.12906587074868978, "grad_norm": 1.705918550491333, "learning_rate": 9.744943887216629e-06, "loss": 0.8752, "step": 3654 }, { "epoch": 0.1291011925523977, "grad_norm": 1.7071564197540283, "learning_rate": 9.744763495818737e-06, "loss": 0.9157, "step": 3655 }, { "epoch": 0.1291365143561056, "grad_norm": 1.7952768802642822, "learning_rate": 9.744583042322274e-06, "loss": 0.8693, "step": 3656 }, { "epoch": 0.1291718361598135, "grad_norm": 1.728308916091919, "learning_rate": 9.7444025267296e-06, "loss": 0.8436, "step": 3657 }, { "epoch": 0.12920715796352142, "grad_norm": 1.7373039722442627, "learning_rate": 9.744221949043076e-06, "loss": 0.9075, "step": 3658 }, { "epoch": 0.1292424797672293, "grad_norm": 1.6699910163879395, "learning_rate": 9.744041309265067e-06, "loss": 0.8792, "step": 3659 }, { "epoch": 0.1292778015709372, "grad_norm": 1.7030267715454102, "learning_rate": 9.743860607397939e-06, "loss": 0.8627, "step": 3660 }, { "epoch": 0.12931312337464512, "grad_norm": 1.7301933765411377, "learning_rate": 9.743679843444052e-06, "loss": 0.8822, "step": 3661 }, { "epoch": 0.12934844517835303, "grad_norm": 1.6342668533325195, "learning_rate": 9.743499017405778e-06, "loss": 0.8325, "step": 3662 }, { "epoch": 0.12938376698206094, "grad_norm": 1.7199504375457764, "learning_rate": 9.74331812928548e-06, "loss": 0.9151, "step": 3663 }, { "epoch": 0.12941908878576885, "grad_norm": 1.7521724700927734, "learning_rate": 9.743137179085524e-06, "loss": 0.8843, "step": 3664 }, { "epoch": 0.12945441058947676, "grad_norm": 7.874884128570557, "learning_rate": 9.742956166808281e-06, "loss": 0.9148, "step": 3665 }, { "epoch": 0.12948973239318465, "grad_norm": 1.786146879196167, "learning_rate": 9.74277509245612e-06, "loss": 0.8628, "step": 3666 }, { "epoch": 0.12952505419689256, "grad_norm": 1.7097569704055786, "learning_rate": 9.742593956031412e-06, "loss": 0.8536, "step": 3667 }, { "epoch": 0.12956037600060047, "grad_norm": 1.7504856586456299, "learning_rate": 9.742412757536523e-06, "loss": 0.9002, "step": 3668 }, { "epoch": 0.12959569780430838, "grad_norm": 2.0895724296569824, "learning_rate": 9.742231496973828e-06, "loss": 0.8599, "step": 3669 }, { "epoch": 0.1296310196080163, "grad_norm": 1.7340482473373413, "learning_rate": 9.742050174345699e-06, "loss": 0.8654, "step": 3670 }, { "epoch": 0.1296663414117242, "grad_norm": 1.6427830457687378, "learning_rate": 9.741868789654507e-06, "loss": 0.8973, "step": 3671 }, { "epoch": 0.12970166321543208, "grad_norm": 1.8856360912322998, "learning_rate": 9.74168734290263e-06, "loss": 0.8839, "step": 3672 }, { "epoch": 0.12973698501914, "grad_norm": 1.861379623413086, "learning_rate": 9.741505834092439e-06, "loss": 0.887, "step": 3673 }, { "epoch": 0.1297723068228479, "grad_norm": 1.7134127616882324, "learning_rate": 9.741324263226312e-06, "loss": 0.9436, "step": 3674 }, { "epoch": 0.12980762862655582, "grad_norm": 1.8655980825424194, "learning_rate": 9.741142630306624e-06, "loss": 0.8575, "step": 3675 }, { "epoch": 0.12984295043026373, "grad_norm": 1.7621161937713623, "learning_rate": 9.740960935335752e-06, "loss": 0.9, "step": 3676 }, { "epoch": 0.12987827223397164, "grad_norm": 1.8639425039291382, "learning_rate": 9.740779178316074e-06, "loss": 0.8772, "step": 3677 }, { "epoch": 0.12991359403767955, "grad_norm": 1.6052982807159424, "learning_rate": 9.740597359249971e-06, "loss": 0.8467, "step": 3678 }, { "epoch": 0.12994891584138743, "grad_norm": 1.5544414520263672, "learning_rate": 9.74041547813982e-06, "loss": 0.8622, "step": 3679 }, { "epoch": 0.12998423764509534, "grad_norm": 1.799560546875, "learning_rate": 9.740233534988002e-06, "loss": 0.8732, "step": 3680 }, { "epoch": 0.13001955944880325, "grad_norm": 1.6519840955734253, "learning_rate": 9.740051529796897e-06, "loss": 0.8919, "step": 3681 }, { "epoch": 0.13005488125251116, "grad_norm": 1.8433395624160767, "learning_rate": 9.739869462568892e-06, "loss": 0.8703, "step": 3682 }, { "epoch": 0.13009020305621907, "grad_norm": 1.7411212921142578, "learning_rate": 9.739687333306364e-06, "loss": 0.8672, "step": 3683 }, { "epoch": 0.13012552485992698, "grad_norm": 1.7351677417755127, "learning_rate": 9.7395051420117e-06, "loss": 0.8562, "step": 3684 }, { "epoch": 0.13016084666363487, "grad_norm": 1.7115254402160645, "learning_rate": 9.739322888687283e-06, "loss": 0.8547, "step": 3685 }, { "epoch": 0.13019616846734278, "grad_norm": 1.7238445281982422, "learning_rate": 9.7391405733355e-06, "loss": 0.851, "step": 3686 }, { "epoch": 0.1302314902710507, "grad_norm": 1.7979192733764648, "learning_rate": 9.738958195958736e-06, "loss": 0.8603, "step": 3687 }, { "epoch": 0.1302668120747586, "grad_norm": 1.6366047859191895, "learning_rate": 9.738775756559378e-06, "loss": 0.8459, "step": 3688 }, { "epoch": 0.1303021338784665, "grad_norm": 1.723311185836792, "learning_rate": 9.738593255139813e-06, "loss": 0.8756, "step": 3689 }, { "epoch": 0.13033745568217442, "grad_norm": 1.7205181121826172, "learning_rate": 9.73841069170243e-06, "loss": 0.8716, "step": 3690 }, { "epoch": 0.13037277748588233, "grad_norm": 1.6848593950271606, "learning_rate": 9.738228066249619e-06, "loss": 0.8965, "step": 3691 }, { "epoch": 0.1304080992895902, "grad_norm": 1.7478700876235962, "learning_rate": 9.73804537878377e-06, "loss": 0.9041, "step": 3692 }, { "epoch": 0.13044342109329812, "grad_norm": 1.7847139835357666, "learning_rate": 9.737862629307273e-06, "loss": 0.8683, "step": 3693 }, { "epoch": 0.13047874289700603, "grad_norm": 1.8245549201965332, "learning_rate": 9.737679817822522e-06, "loss": 0.8907, "step": 3694 }, { "epoch": 0.13051406470071394, "grad_norm": 1.9869439601898193, "learning_rate": 9.737496944331907e-06, "loss": 0.8904, "step": 3695 }, { "epoch": 0.13054938650442185, "grad_norm": 1.7221715450286865, "learning_rate": 9.737314008837822e-06, "loss": 0.8559, "step": 3696 }, { "epoch": 0.13058470830812977, "grad_norm": 1.8361729383468628, "learning_rate": 9.737131011342662e-06, "loss": 0.8851, "step": 3697 }, { "epoch": 0.13062003011183765, "grad_norm": 1.9764034748077393, "learning_rate": 9.736947951848823e-06, "loss": 0.8788, "step": 3698 }, { "epoch": 0.13065535191554556, "grad_norm": 1.879431128501892, "learning_rate": 9.7367648303587e-06, "loss": 0.8592, "step": 3699 }, { "epoch": 0.13069067371925347, "grad_norm": 1.9614619016647339, "learning_rate": 9.736581646874688e-06, "loss": 0.8967, "step": 3700 }, { "epoch": 0.13072599552296138, "grad_norm": 1.9402843713760376, "learning_rate": 9.736398401399185e-06, "loss": 0.8587, "step": 3701 }, { "epoch": 0.1307613173266693, "grad_norm": 1.8814395666122437, "learning_rate": 9.736215093934593e-06, "loss": 0.8703, "step": 3702 }, { "epoch": 0.1307966391303772, "grad_norm": 2.006608009338379, "learning_rate": 9.736031724483305e-06, "loss": 0.9065, "step": 3703 }, { "epoch": 0.1308319609340851, "grad_norm": 1.7368695735931396, "learning_rate": 9.735848293047727e-06, "loss": 0.8734, "step": 3704 }, { "epoch": 0.130867282737793, "grad_norm": 1.6635394096374512, "learning_rate": 9.735664799630253e-06, "loss": 0.8718, "step": 3705 }, { "epoch": 0.1309026045415009, "grad_norm": 1.6726199388504028, "learning_rate": 9.735481244233292e-06, "loss": 0.8684, "step": 3706 }, { "epoch": 0.13093792634520882, "grad_norm": 1.801328182220459, "learning_rate": 9.735297626859242e-06, "loss": 0.8582, "step": 3707 }, { "epoch": 0.13097324814891673, "grad_norm": 1.7122881412506104, "learning_rate": 9.735113947510504e-06, "loss": 0.8937, "step": 3708 }, { "epoch": 0.13100856995262464, "grad_norm": 1.9054243564605713, "learning_rate": 9.734930206189487e-06, "loss": 0.893, "step": 3709 }, { "epoch": 0.13104389175633255, "grad_norm": 1.7829458713531494, "learning_rate": 9.734746402898593e-06, "loss": 0.8617, "step": 3710 }, { "epoch": 0.13107921356004043, "grad_norm": 1.8848849534988403, "learning_rate": 9.734562537640229e-06, "loss": 0.9347, "step": 3711 }, { "epoch": 0.13111453536374834, "grad_norm": 1.6426241397857666, "learning_rate": 9.7343786104168e-06, "loss": 0.8719, "step": 3712 }, { "epoch": 0.13114985716745625, "grad_norm": 1.7911603450775146, "learning_rate": 9.734194621230712e-06, "loss": 0.8667, "step": 3713 }, { "epoch": 0.13118517897116416, "grad_norm": 2.0178470611572266, "learning_rate": 9.734010570084378e-06, "loss": 0.8806, "step": 3714 }, { "epoch": 0.13122050077487207, "grad_norm": 1.8750828504562378, "learning_rate": 9.733826456980202e-06, "loss": 0.8975, "step": 3715 }, { "epoch": 0.13125582257857998, "grad_norm": 1.9706525802612305, "learning_rate": 9.733642281920594e-06, "loss": 0.8764, "step": 3716 }, { "epoch": 0.1312911443822879, "grad_norm": 1.8189972639083862, "learning_rate": 9.733458044907967e-06, "loss": 0.8482, "step": 3717 }, { "epoch": 0.13132646618599578, "grad_norm": 1.7497371435165405, "learning_rate": 9.733273745944732e-06, "loss": 0.8469, "step": 3718 }, { "epoch": 0.1313617879897037, "grad_norm": 1.9400385618209839, "learning_rate": 9.733089385033297e-06, "loss": 0.8652, "step": 3719 }, { "epoch": 0.1313971097934116, "grad_norm": 2.1678860187530518, "learning_rate": 9.732904962176079e-06, "loss": 0.8955, "step": 3720 }, { "epoch": 0.1314324315971195, "grad_norm": 1.858949065208435, "learning_rate": 9.732720477375493e-06, "loss": 0.8796, "step": 3721 }, { "epoch": 0.13146775340082742, "grad_norm": 2.1129984855651855, "learning_rate": 9.732535930633947e-06, "loss": 0.8923, "step": 3722 }, { "epoch": 0.13150307520453533, "grad_norm": 2.9571661949157715, "learning_rate": 9.732351321953863e-06, "loss": 0.9056, "step": 3723 }, { "epoch": 0.1315383970082432, "grad_norm": 2.0102880001068115, "learning_rate": 9.732166651337654e-06, "loss": 0.8639, "step": 3724 }, { "epoch": 0.13157371881195112, "grad_norm": 1.952149510383606, "learning_rate": 9.731981918787736e-06, "loss": 0.8343, "step": 3725 }, { "epoch": 0.13160904061565903, "grad_norm": 1.6904075145721436, "learning_rate": 9.731797124306531e-06, "loss": 0.8905, "step": 3726 }, { "epoch": 0.13164436241936694, "grad_norm": 1.7420105934143066, "learning_rate": 9.731612267896453e-06, "loss": 0.8862, "step": 3727 }, { "epoch": 0.13167968422307486, "grad_norm": 1.910852074623108, "learning_rate": 9.731427349559923e-06, "loss": 0.9293, "step": 3728 }, { "epoch": 0.13171500602678277, "grad_norm": 1.9026261568069458, "learning_rate": 9.731242369299361e-06, "loss": 0.8832, "step": 3729 }, { "epoch": 0.13175032783049068, "grad_norm": 1.797067642211914, "learning_rate": 9.731057327117188e-06, "loss": 0.8682, "step": 3730 }, { "epoch": 0.13178564963419856, "grad_norm": 1.7109366655349731, "learning_rate": 9.730872223015829e-06, "loss": 0.8716, "step": 3731 }, { "epoch": 0.13182097143790647, "grad_norm": 1.7661750316619873, "learning_rate": 9.7306870569977e-06, "loss": 0.8875, "step": 3732 }, { "epoch": 0.13185629324161438, "grad_norm": 1.8177192211151123, "learning_rate": 9.73050182906523e-06, "loss": 0.86, "step": 3733 }, { "epoch": 0.1318916150453223, "grad_norm": 1.9132572412490845, "learning_rate": 9.730316539220842e-06, "loss": 0.886, "step": 3734 }, { "epoch": 0.1319269368490302, "grad_norm": 1.6958190202713013, "learning_rate": 9.730131187466958e-06, "loss": 0.8786, "step": 3735 }, { "epoch": 0.1319622586527381, "grad_norm": 1.8263463973999023, "learning_rate": 9.729945773806007e-06, "loss": 0.8894, "step": 3736 }, { "epoch": 0.131997580456446, "grad_norm": 1.7413697242736816, "learning_rate": 9.729760298240414e-06, "loss": 0.8736, "step": 3737 }, { "epoch": 0.1320329022601539, "grad_norm": 1.866529107093811, "learning_rate": 9.729574760772609e-06, "loss": 0.9133, "step": 3738 }, { "epoch": 0.13206822406386182, "grad_norm": 1.8661094903945923, "learning_rate": 9.729389161405018e-06, "loss": 0.8694, "step": 3739 }, { "epoch": 0.13210354586756973, "grad_norm": 1.6117786169052124, "learning_rate": 9.729203500140072e-06, "loss": 0.862, "step": 3740 }, { "epoch": 0.13213886767127764, "grad_norm": 1.5810633897781372, "learning_rate": 9.729017776980198e-06, "loss": 0.8746, "step": 3741 }, { "epoch": 0.13217418947498555, "grad_norm": 1.7777760028839111, "learning_rate": 9.728831991927827e-06, "loss": 0.8855, "step": 3742 }, { "epoch": 0.13220951127869346, "grad_norm": 1.9129095077514648, "learning_rate": 9.728646144985394e-06, "loss": 0.9148, "step": 3743 }, { "epoch": 0.13224483308240134, "grad_norm": 1.705339789390564, "learning_rate": 9.72846023615533e-06, "loss": 0.8944, "step": 3744 }, { "epoch": 0.13228015488610925, "grad_norm": 1.953466773033142, "learning_rate": 9.728274265440065e-06, "loss": 0.882, "step": 3745 }, { "epoch": 0.13231547668981716, "grad_norm": 1.715052843093872, "learning_rate": 9.728088232842037e-06, "loss": 0.8985, "step": 3746 }, { "epoch": 0.13235079849352507, "grad_norm": 1.7005646228790283, "learning_rate": 9.727902138363677e-06, "loss": 0.8726, "step": 3747 }, { "epoch": 0.13238612029723298, "grad_norm": 1.8852205276489258, "learning_rate": 9.727715982007425e-06, "loss": 0.8707, "step": 3748 }, { "epoch": 0.1324214421009409, "grad_norm": 2.103166341781616, "learning_rate": 9.727529763775711e-06, "loss": 0.8885, "step": 3749 }, { "epoch": 0.13245676390464878, "grad_norm": 1.90603768825531, "learning_rate": 9.72734348367098e-06, "loss": 0.8994, "step": 3750 }, { "epoch": 0.1324920857083567, "grad_norm": 1.6591883897781372, "learning_rate": 9.727157141695664e-06, "loss": 0.914, "step": 3751 }, { "epoch": 0.1325274075120646, "grad_norm": 1.6713131666183472, "learning_rate": 9.726970737852206e-06, "loss": 0.8808, "step": 3752 }, { "epoch": 0.1325627293157725, "grad_norm": 1.756262183189392, "learning_rate": 9.726784272143042e-06, "loss": 0.8899, "step": 3753 }, { "epoch": 0.13259805111948042, "grad_norm": 1.8648273944854736, "learning_rate": 9.726597744570614e-06, "loss": 0.8589, "step": 3754 }, { "epoch": 0.13263337292318833, "grad_norm": 1.7113438844680786, "learning_rate": 9.726411155137365e-06, "loss": 0.8764, "step": 3755 }, { "epoch": 0.13266869472689624, "grad_norm": 1.72063410282135, "learning_rate": 9.726224503845733e-06, "loss": 0.8684, "step": 3756 }, { "epoch": 0.13270401653060412, "grad_norm": 1.7227866649627686, "learning_rate": 9.726037790698164e-06, "loss": 0.892, "step": 3757 }, { "epoch": 0.13273933833431203, "grad_norm": 1.774308681488037, "learning_rate": 9.725851015697102e-06, "loss": 0.8927, "step": 3758 }, { "epoch": 0.13277466013801995, "grad_norm": 1.638424038887024, "learning_rate": 9.725664178844987e-06, "loss": 0.8396, "step": 3759 }, { "epoch": 0.13280998194172786, "grad_norm": 1.7626655101776123, "learning_rate": 9.72547728014427e-06, "loss": 0.8707, "step": 3760 }, { "epoch": 0.13284530374543577, "grad_norm": 1.8231065273284912, "learning_rate": 9.725290319597394e-06, "loss": 0.8599, "step": 3761 }, { "epoch": 0.13288062554914368, "grad_norm": 1.7800194025039673, "learning_rate": 9.725103297206806e-06, "loss": 0.9159, "step": 3762 }, { "epoch": 0.13291594735285156, "grad_norm": 1.7642297744750977, "learning_rate": 9.724916212974955e-06, "loss": 0.9042, "step": 3763 }, { "epoch": 0.13295126915655947, "grad_norm": 1.931039571762085, "learning_rate": 9.724729066904291e-06, "loss": 0.8946, "step": 3764 }, { "epoch": 0.13298659096026738, "grad_norm": 1.7676165103912354, "learning_rate": 9.724541858997259e-06, "loss": 0.8613, "step": 3765 }, { "epoch": 0.1330219127639753, "grad_norm": 1.8589180707931519, "learning_rate": 9.72435458925631e-06, "loss": 0.8702, "step": 3766 }, { "epoch": 0.1330572345676832, "grad_norm": 1.7693921327590942, "learning_rate": 9.724167257683898e-06, "loss": 0.8442, "step": 3767 }, { "epoch": 0.1330925563713911, "grad_norm": 2.26350474357605, "learning_rate": 9.723979864282471e-06, "loss": 0.8635, "step": 3768 }, { "epoch": 0.13312787817509902, "grad_norm": 1.6769297122955322, "learning_rate": 9.723792409054485e-06, "loss": 0.8753, "step": 3769 }, { "epoch": 0.1331631999788069, "grad_norm": 4.539920806884766, "learning_rate": 9.723604892002391e-06, "loss": 0.8355, "step": 3770 }, { "epoch": 0.13319852178251482, "grad_norm": 2.036283493041992, "learning_rate": 9.723417313128647e-06, "loss": 0.8891, "step": 3771 }, { "epoch": 0.13323384358622273, "grad_norm": 1.8829292058944702, "learning_rate": 9.723229672435704e-06, "loss": 0.8917, "step": 3772 }, { "epoch": 0.13326916538993064, "grad_norm": 1.8257734775543213, "learning_rate": 9.723041969926017e-06, "loss": 0.8782, "step": 3773 }, { "epoch": 0.13330448719363855, "grad_norm": 1.7461602687835693, "learning_rate": 9.722854205602046e-06, "loss": 0.8714, "step": 3774 }, { "epoch": 0.13333980899734646, "grad_norm": 2.142500638961792, "learning_rate": 9.722666379466245e-06, "loss": 0.9268, "step": 3775 }, { "epoch": 0.13337513080105434, "grad_norm": 1.696738839149475, "learning_rate": 9.722478491521077e-06, "loss": 0.8673, "step": 3776 }, { "epoch": 0.13341045260476225, "grad_norm": 1.932573676109314, "learning_rate": 9.722290541768998e-06, "loss": 0.8922, "step": 3777 }, { "epoch": 0.13344577440847016, "grad_norm": 1.772632122039795, "learning_rate": 9.722102530212466e-06, "loss": 0.8809, "step": 3778 }, { "epoch": 0.13348109621217807, "grad_norm": 1.8179963827133179, "learning_rate": 9.721914456853945e-06, "loss": 0.8668, "step": 3779 }, { "epoch": 0.13351641801588598, "grad_norm": 1.8477891683578491, "learning_rate": 9.721726321695895e-06, "loss": 0.8688, "step": 3780 }, { "epoch": 0.1335517398195939, "grad_norm": 2.022402048110962, "learning_rate": 9.721538124740778e-06, "loss": 0.9104, "step": 3781 }, { "epoch": 0.1335870616233018, "grad_norm": 1.6869722604751587, "learning_rate": 9.721349865991058e-06, "loss": 0.916, "step": 3782 }, { "epoch": 0.1336223834270097, "grad_norm": 1.7385759353637695, "learning_rate": 9.721161545449198e-06, "loss": 0.9151, "step": 3783 }, { "epoch": 0.1336577052307176, "grad_norm": 2.810310125350952, "learning_rate": 9.720973163117665e-06, "loss": 0.9086, "step": 3784 }, { "epoch": 0.1336930270344255, "grad_norm": 1.886878490447998, "learning_rate": 9.72078471899892e-06, "loss": 0.8964, "step": 3785 }, { "epoch": 0.13372834883813342, "grad_norm": 1.7662287950515747, "learning_rate": 9.720596213095433e-06, "loss": 0.8952, "step": 3786 }, { "epoch": 0.13376367064184133, "grad_norm": 1.819546103477478, "learning_rate": 9.720407645409672e-06, "loss": 0.9226, "step": 3787 }, { "epoch": 0.13379899244554924, "grad_norm": 1.802285075187683, "learning_rate": 9.720219015944102e-06, "loss": 0.9337, "step": 3788 }, { "epoch": 0.13383431424925712, "grad_norm": 1.7137832641601562, "learning_rate": 9.72003032470119e-06, "loss": 0.9183, "step": 3789 }, { "epoch": 0.13386963605296504, "grad_norm": 1.7465540170669556, "learning_rate": 9.719841571683412e-06, "loss": 0.9056, "step": 3790 }, { "epoch": 0.13390495785667295, "grad_norm": 1.7013126611709595, "learning_rate": 9.719652756893234e-06, "loss": 0.8673, "step": 3791 }, { "epoch": 0.13394027966038086, "grad_norm": 1.8006584644317627, "learning_rate": 9.719463880333127e-06, "loss": 0.8722, "step": 3792 }, { "epoch": 0.13397560146408877, "grad_norm": 1.8308115005493164, "learning_rate": 9.719274942005564e-06, "loss": 0.8899, "step": 3793 }, { "epoch": 0.13401092326779668, "grad_norm": 1.8586831092834473, "learning_rate": 9.719085941913018e-06, "loss": 0.9051, "step": 3794 }, { "epoch": 0.1340462450715046, "grad_norm": 1.5960116386413574, "learning_rate": 9.718896880057962e-06, "loss": 0.8682, "step": 3795 }, { "epoch": 0.13408156687521247, "grad_norm": 1.917148232460022, "learning_rate": 9.718707756442872e-06, "loss": 0.9414, "step": 3796 }, { "epoch": 0.13411688867892038, "grad_norm": 1.7459254264831543, "learning_rate": 9.71851857107022e-06, "loss": 0.8818, "step": 3797 }, { "epoch": 0.1341522104826283, "grad_norm": 1.7339351177215576, "learning_rate": 9.718329323942486e-06, "loss": 0.8823, "step": 3798 }, { "epoch": 0.1341875322863362, "grad_norm": 1.8359401226043701, "learning_rate": 9.718140015062144e-06, "loss": 0.8498, "step": 3799 }, { "epoch": 0.1342228540900441, "grad_norm": 2.0736093521118164, "learning_rate": 9.717950644431671e-06, "loss": 0.8621, "step": 3800 }, { "epoch": 0.13425817589375202, "grad_norm": 1.7665960788726807, "learning_rate": 9.717761212053547e-06, "loss": 0.878, "step": 3801 }, { "epoch": 0.1342934976974599, "grad_norm": 1.7353438138961792, "learning_rate": 9.717571717930253e-06, "loss": 0.8514, "step": 3802 }, { "epoch": 0.13432881950116782, "grad_norm": 2.0664682388305664, "learning_rate": 9.717382162064266e-06, "loss": 0.8735, "step": 3803 }, { "epoch": 0.13436414130487573, "grad_norm": 2.1188275814056396, "learning_rate": 9.717192544458068e-06, "loss": 0.8814, "step": 3804 }, { "epoch": 0.13439946310858364, "grad_norm": 1.8088122606277466, "learning_rate": 9.71700286511414e-06, "loss": 0.839, "step": 3805 }, { "epoch": 0.13443478491229155, "grad_norm": 1.6981347799301147, "learning_rate": 9.716813124034967e-06, "loss": 0.8888, "step": 3806 }, { "epoch": 0.13447010671599946, "grad_norm": 2.104419231414795, "learning_rate": 9.71662332122303e-06, "loss": 0.8846, "step": 3807 }, { "epoch": 0.13450542851970737, "grad_norm": 2.724405288696289, "learning_rate": 9.716433456680811e-06, "loss": 0.8645, "step": 3808 }, { "epoch": 0.13454075032341525, "grad_norm": 1.918140172958374, "learning_rate": 9.716243530410801e-06, "loss": 0.8958, "step": 3809 }, { "epoch": 0.13457607212712316, "grad_norm": 2.107703447341919, "learning_rate": 9.716053542415481e-06, "loss": 0.9003, "step": 3810 }, { "epoch": 0.13461139393083107, "grad_norm": 2.259474992752075, "learning_rate": 9.715863492697339e-06, "loss": 0.8665, "step": 3811 }, { "epoch": 0.13464671573453899, "grad_norm": 1.7449400424957275, "learning_rate": 9.71567338125886e-06, "loss": 0.8535, "step": 3812 }, { "epoch": 0.1346820375382469, "grad_norm": 2.644339084625244, "learning_rate": 9.715483208102538e-06, "loss": 0.8566, "step": 3813 }, { "epoch": 0.1347173593419548, "grad_norm": 1.8218673467636108, "learning_rate": 9.715292973230856e-06, "loss": 0.8678, "step": 3814 }, { "epoch": 0.1347526811456627, "grad_norm": 1.7966997623443604, "learning_rate": 9.715102676646307e-06, "loss": 0.8512, "step": 3815 }, { "epoch": 0.1347880029493706, "grad_norm": 1.9072356224060059, "learning_rate": 9.714912318351379e-06, "loss": 0.8413, "step": 3816 }, { "epoch": 0.1348233247530785, "grad_norm": 1.7888901233673096, "learning_rate": 9.714721898348564e-06, "loss": 0.8831, "step": 3817 }, { "epoch": 0.13485864655678642, "grad_norm": 1.7145335674285889, "learning_rate": 9.714531416640356e-06, "loss": 0.8709, "step": 3818 }, { "epoch": 0.13489396836049433, "grad_norm": 2.007749080657959, "learning_rate": 9.71434087322925e-06, "loss": 0.865, "step": 3819 }, { "epoch": 0.13492929016420224, "grad_norm": 1.7253327369689941, "learning_rate": 9.714150268117733e-06, "loss": 0.8848, "step": 3820 }, { "epoch": 0.13496461196791015, "grad_norm": 2.066927909851074, "learning_rate": 9.713959601308305e-06, "loss": 0.9026, "step": 3821 }, { "epoch": 0.13499993377161804, "grad_norm": 1.753029227256775, "learning_rate": 9.71376887280346e-06, "loss": 0.8838, "step": 3822 }, { "epoch": 0.13503525557532595, "grad_norm": 1.9162931442260742, "learning_rate": 9.713578082605695e-06, "loss": 0.8911, "step": 3823 }, { "epoch": 0.13507057737903386, "grad_norm": 1.9239459037780762, "learning_rate": 9.713387230717504e-06, "loss": 0.8773, "step": 3824 }, { "epoch": 0.13510589918274177, "grad_norm": 2.033997058868408, "learning_rate": 9.713196317141388e-06, "loss": 0.8657, "step": 3825 }, { "epoch": 0.13514122098644968, "grad_norm": 2.0093777179718018, "learning_rate": 9.713005341879843e-06, "loss": 0.8624, "step": 3826 }, { "epoch": 0.1351765427901576, "grad_norm": 1.8456084728240967, "learning_rate": 9.712814304935372e-06, "loss": 0.9021, "step": 3827 }, { "epoch": 0.1352118645938655, "grad_norm": 1.8445916175842285, "learning_rate": 9.712623206310474e-06, "loss": 0.8832, "step": 3828 }, { "epoch": 0.13524718639757338, "grad_norm": 1.837086796760559, "learning_rate": 9.712432046007648e-06, "loss": 0.884, "step": 3829 }, { "epoch": 0.1352825082012813, "grad_norm": 1.6817584037780762, "learning_rate": 9.712240824029397e-06, "loss": 0.8963, "step": 3830 }, { "epoch": 0.1353178300049892, "grad_norm": 9.051952362060547, "learning_rate": 9.712049540378225e-06, "loss": 0.8743, "step": 3831 }, { "epoch": 0.13535315180869711, "grad_norm": 1.8582707643508911, "learning_rate": 9.711858195056634e-06, "loss": 0.8972, "step": 3832 }, { "epoch": 0.13538847361240502, "grad_norm": 1.9743740558624268, "learning_rate": 9.711666788067127e-06, "loss": 0.8782, "step": 3833 }, { "epoch": 0.13542379541611294, "grad_norm": 2.0259921550750732, "learning_rate": 9.711475319412213e-06, "loss": 0.8288, "step": 3834 }, { "epoch": 0.13545911721982082, "grad_norm": 1.7625936269760132, "learning_rate": 9.711283789094394e-06, "loss": 0.8502, "step": 3835 }, { "epoch": 0.13549443902352873, "grad_norm": 1.8136706352233887, "learning_rate": 9.71109219711618e-06, "loss": 0.8843, "step": 3836 }, { "epoch": 0.13552976082723664, "grad_norm": 1.7985541820526123, "learning_rate": 9.710900543480075e-06, "loss": 0.8649, "step": 3837 }, { "epoch": 0.13556508263094455, "grad_norm": 1.9328961372375488, "learning_rate": 9.71070882818859e-06, "loss": 0.8821, "step": 3838 }, { "epoch": 0.13560040443465246, "grad_norm": 1.7884111404418945, "learning_rate": 9.710517051244234e-06, "loss": 0.841, "step": 3839 }, { "epoch": 0.13563572623836037, "grad_norm": 1.7444663047790527, "learning_rate": 9.710325212649516e-06, "loss": 0.9393, "step": 3840 }, { "epoch": 0.13567104804206828, "grad_norm": 2.0037760734558105, "learning_rate": 9.710133312406948e-06, "loss": 0.8928, "step": 3841 }, { "epoch": 0.13570636984577616, "grad_norm": 1.9184726476669312, "learning_rate": 9.70994135051904e-06, "loss": 0.8399, "step": 3842 }, { "epoch": 0.13574169164948408, "grad_norm": 1.7246031761169434, "learning_rate": 9.709749326988304e-06, "loss": 0.8641, "step": 3843 }, { "epoch": 0.13577701345319199, "grad_norm": 1.2925649881362915, "learning_rate": 9.709557241817256e-06, "loss": 0.6175, "step": 3844 }, { "epoch": 0.1358123352568999, "grad_norm": 1.9274402856826782, "learning_rate": 9.709365095008406e-06, "loss": 0.9068, "step": 3845 }, { "epoch": 0.1358476570606078, "grad_norm": 2.088137149810791, "learning_rate": 9.709172886564272e-06, "loss": 0.923, "step": 3846 }, { "epoch": 0.13588297886431572, "grad_norm": 2.034510374069214, "learning_rate": 9.708980616487369e-06, "loss": 0.8822, "step": 3847 }, { "epoch": 0.1359183006680236, "grad_norm": 2.0931336879730225, "learning_rate": 9.708788284780212e-06, "loss": 0.8859, "step": 3848 }, { "epoch": 0.1359536224717315, "grad_norm": 1.8127005100250244, "learning_rate": 9.70859589144532e-06, "loss": 0.8663, "step": 3849 }, { "epoch": 0.13598894427543942, "grad_norm": 1.6966270208358765, "learning_rate": 9.708403436485209e-06, "loss": 0.8198, "step": 3850 }, { "epoch": 0.13602426607914733, "grad_norm": 1.7891881465911865, "learning_rate": 9.7082109199024e-06, "loss": 0.8819, "step": 3851 }, { "epoch": 0.13605958788285524, "grad_norm": 1.636919379234314, "learning_rate": 9.70801834169941e-06, "loss": 0.8657, "step": 3852 }, { "epoch": 0.13609490968656315, "grad_norm": 1.7187772989273071, "learning_rate": 9.707825701878762e-06, "loss": 0.8522, "step": 3853 }, { "epoch": 0.13613023149027106, "grad_norm": 1.878676414489746, "learning_rate": 9.707633000442975e-06, "loss": 0.8527, "step": 3854 }, { "epoch": 0.13616555329397895, "grad_norm": 1.800923466682434, "learning_rate": 9.707440237394574e-06, "loss": 0.8704, "step": 3855 }, { "epoch": 0.13620087509768686, "grad_norm": 1.7173562049865723, "learning_rate": 9.707247412736078e-06, "loss": 0.8394, "step": 3856 }, { "epoch": 0.13623619690139477, "grad_norm": 1.6166058778762817, "learning_rate": 9.707054526470015e-06, "loss": 0.841, "step": 3857 }, { "epoch": 0.13627151870510268, "grad_norm": 1.6984983682632446, "learning_rate": 9.706861578598906e-06, "loss": 0.8975, "step": 3858 }, { "epoch": 0.1363068405088106, "grad_norm": 1.694858431816101, "learning_rate": 9.706668569125277e-06, "loss": 0.8546, "step": 3859 }, { "epoch": 0.1363421623125185, "grad_norm": 1.8753290176391602, "learning_rate": 9.706475498051655e-06, "loss": 0.8995, "step": 3860 }, { "epoch": 0.13637748411622638, "grad_norm": 1.7543004751205444, "learning_rate": 9.706282365380569e-06, "loss": 0.8408, "step": 3861 }, { "epoch": 0.1364128059199343, "grad_norm": 1.7208669185638428, "learning_rate": 9.706089171114542e-06, "loss": 0.8696, "step": 3862 }, { "epoch": 0.1364481277236422, "grad_norm": 1.8531419038772583, "learning_rate": 9.705895915256102e-06, "loss": 0.8986, "step": 3863 }, { "epoch": 0.13648344952735011, "grad_norm": 1.6545623540878296, "learning_rate": 9.705702597807783e-06, "loss": 0.862, "step": 3864 }, { "epoch": 0.13651877133105803, "grad_norm": 1.7425645589828491, "learning_rate": 9.705509218772114e-06, "loss": 0.8783, "step": 3865 }, { "epoch": 0.13655409313476594, "grad_norm": 1.7266587018966675, "learning_rate": 9.705315778151624e-06, "loss": 0.8956, "step": 3866 }, { "epoch": 0.13658941493847385, "grad_norm": 1.6895724534988403, "learning_rate": 9.705122275948846e-06, "loss": 0.8511, "step": 3867 }, { "epoch": 0.13662473674218173, "grad_norm": 1.9926321506500244, "learning_rate": 9.70492871216631e-06, "loss": 0.8793, "step": 3868 }, { "epoch": 0.13666005854588964, "grad_norm": 1.64768385887146, "learning_rate": 9.704735086806553e-06, "loss": 0.8664, "step": 3869 }, { "epoch": 0.13669538034959755, "grad_norm": 1.8997641801834106, "learning_rate": 9.70454139987211e-06, "loss": 0.8877, "step": 3870 }, { "epoch": 0.13673070215330546, "grad_norm": 1.7596871852874756, "learning_rate": 9.70434765136551e-06, "loss": 0.8506, "step": 3871 }, { "epoch": 0.13676602395701337, "grad_norm": 1.6937980651855469, "learning_rate": 9.704153841289294e-06, "loss": 0.8776, "step": 3872 }, { "epoch": 0.13680134576072128, "grad_norm": 1.7776455879211426, "learning_rate": 9.703959969645994e-06, "loss": 0.871, "step": 3873 }, { "epoch": 0.13683666756442917, "grad_norm": 1.667746663093567, "learning_rate": 9.703766036438153e-06, "loss": 0.8915, "step": 3874 }, { "epoch": 0.13687198936813708, "grad_norm": 1.5846377611160278, "learning_rate": 9.703572041668308e-06, "loss": 0.8605, "step": 3875 }, { "epoch": 0.136907311171845, "grad_norm": 1.6355358362197876, "learning_rate": 9.703377985338994e-06, "loss": 0.8495, "step": 3876 }, { "epoch": 0.1369426329755529, "grad_norm": 1.707934021949768, "learning_rate": 9.703183867452752e-06, "loss": 0.8638, "step": 3877 }, { "epoch": 0.1369779547792608, "grad_norm": 1.245072841644287, "learning_rate": 9.702989688012125e-06, "loss": 0.599, "step": 3878 }, { "epoch": 0.13701327658296872, "grad_norm": 2.003237247467041, "learning_rate": 9.702795447019653e-06, "loss": 0.8456, "step": 3879 }, { "epoch": 0.13704859838667663, "grad_norm": 2.0273385047912598, "learning_rate": 9.702601144477878e-06, "loss": 0.8583, "step": 3880 }, { "epoch": 0.1370839201903845, "grad_norm": 1.701995849609375, "learning_rate": 9.702406780389344e-06, "loss": 0.9107, "step": 3881 }, { "epoch": 0.13711924199409242, "grad_norm": 1.7904837131500244, "learning_rate": 9.702212354756593e-06, "loss": 0.8755, "step": 3882 }, { "epoch": 0.13715456379780033, "grad_norm": 1.6354830265045166, "learning_rate": 9.70201786758217e-06, "loss": 0.8725, "step": 3883 }, { "epoch": 0.13718988560150824, "grad_norm": 1.6141785383224487, "learning_rate": 9.701823318868621e-06, "loss": 0.8793, "step": 3884 }, { "epoch": 0.13722520740521615, "grad_norm": 1.7369811534881592, "learning_rate": 9.701628708618494e-06, "loss": 0.8742, "step": 3885 }, { "epoch": 0.13726052920892406, "grad_norm": 1.6562660932540894, "learning_rate": 9.701434036834334e-06, "loss": 0.842, "step": 3886 }, { "epoch": 0.13729585101263195, "grad_norm": 1.8563164472579956, "learning_rate": 9.701239303518688e-06, "loss": 0.8683, "step": 3887 }, { "epoch": 0.13733117281633986, "grad_norm": 1.761479377746582, "learning_rate": 9.701044508674106e-06, "loss": 0.8944, "step": 3888 }, { "epoch": 0.13736649462004777, "grad_norm": 1.7688201665878296, "learning_rate": 9.700849652303138e-06, "loss": 0.8783, "step": 3889 }, { "epoch": 0.13740181642375568, "grad_norm": 2.052988290786743, "learning_rate": 9.700654734408334e-06, "loss": 0.8502, "step": 3890 }, { "epoch": 0.1374371382274636, "grad_norm": 1.6989250183105469, "learning_rate": 9.700459754992242e-06, "loss": 0.8093, "step": 3891 }, { "epoch": 0.1374724600311715, "grad_norm": 1.6360344886779785, "learning_rate": 9.700264714057418e-06, "loss": 0.8525, "step": 3892 }, { "epoch": 0.1375077818348794, "grad_norm": 1.823107123374939, "learning_rate": 9.700069611606413e-06, "loss": 0.9016, "step": 3893 }, { "epoch": 0.1375431036385873, "grad_norm": 1.5741894245147705, "learning_rate": 9.69987444764178e-06, "loss": 0.8526, "step": 3894 }, { "epoch": 0.1375784254422952, "grad_norm": 1.827270746231079, "learning_rate": 9.699679222166073e-06, "loss": 0.9018, "step": 3895 }, { "epoch": 0.13761374724600312, "grad_norm": 1.7741940021514893, "learning_rate": 9.69948393518185e-06, "loss": 0.9009, "step": 3896 }, { "epoch": 0.13764906904971103, "grad_norm": 1.8523218631744385, "learning_rate": 9.699288586691661e-06, "loss": 0.8605, "step": 3897 }, { "epoch": 0.13768439085341894, "grad_norm": 1.705439805984497, "learning_rate": 9.69909317669807e-06, "loss": 0.8727, "step": 3898 }, { "epoch": 0.13771971265712685, "grad_norm": 1.8656060695648193, "learning_rate": 9.69889770520363e-06, "loss": 0.8961, "step": 3899 }, { "epoch": 0.13775503446083473, "grad_norm": 1.7891603708267212, "learning_rate": 9.698702172210898e-06, "loss": 0.8362, "step": 3900 }, { "epoch": 0.13779035626454264, "grad_norm": 2.062687397003174, "learning_rate": 9.698506577722437e-06, "loss": 0.9165, "step": 3901 }, { "epoch": 0.13782567806825055, "grad_norm": 1.1546156406402588, "learning_rate": 9.698310921740805e-06, "loss": 0.6357, "step": 3902 }, { "epoch": 0.13786099987195846, "grad_norm": 1.7397798299789429, "learning_rate": 9.698115204268564e-06, "loss": 0.8737, "step": 3903 }, { "epoch": 0.13789632167566637, "grad_norm": 1.8932515382766724, "learning_rate": 9.697919425308271e-06, "loss": 0.8722, "step": 3904 }, { "epoch": 0.13793164347937428, "grad_norm": 1.7774617671966553, "learning_rate": 9.697723584862496e-06, "loss": 0.8791, "step": 3905 }, { "epoch": 0.1379669652830822, "grad_norm": 1.7914636135101318, "learning_rate": 9.697527682933794e-06, "loss": 0.9057, "step": 3906 }, { "epoch": 0.13800228708679008, "grad_norm": 1.9078878164291382, "learning_rate": 9.697331719524733e-06, "loss": 0.8763, "step": 3907 }, { "epoch": 0.138037608890498, "grad_norm": 1.8707493543624878, "learning_rate": 9.697135694637881e-06, "loss": 0.9042, "step": 3908 }, { "epoch": 0.1380729306942059, "grad_norm": 1.8734843730926514, "learning_rate": 9.696939608275796e-06, "loss": 0.8921, "step": 3909 }, { "epoch": 0.1381082524979138, "grad_norm": 3.4908487796783447, "learning_rate": 9.696743460441051e-06, "loss": 0.8691, "step": 3910 }, { "epoch": 0.13814357430162172, "grad_norm": 1.9514774084091187, "learning_rate": 9.69654725113621e-06, "loss": 0.8661, "step": 3911 }, { "epoch": 0.13817889610532963, "grad_norm": 1.991989016532898, "learning_rate": 9.696350980363842e-06, "loss": 0.8659, "step": 3912 }, { "epoch": 0.1382142179090375, "grad_norm": 1.845996379852295, "learning_rate": 9.696154648126514e-06, "loss": 0.8948, "step": 3913 }, { "epoch": 0.13824953971274542, "grad_norm": 1.8217576742172241, "learning_rate": 9.695958254426797e-06, "loss": 0.8747, "step": 3914 }, { "epoch": 0.13828486151645333, "grad_norm": 1.6777029037475586, "learning_rate": 9.69576179926726e-06, "loss": 0.8626, "step": 3915 }, { "epoch": 0.13832018332016124, "grad_norm": 1.6992570161819458, "learning_rate": 9.695565282650476e-06, "loss": 0.8719, "step": 3916 }, { "epoch": 0.13835550512386915, "grad_norm": 2.00607967376709, "learning_rate": 9.695368704579017e-06, "loss": 0.9067, "step": 3917 }, { "epoch": 0.13839082692757707, "grad_norm": 2.0769762992858887, "learning_rate": 9.695172065055454e-06, "loss": 0.8712, "step": 3918 }, { "epoch": 0.13842614873128498, "grad_norm": 1.7243096828460693, "learning_rate": 9.694975364082362e-06, "loss": 0.8763, "step": 3919 }, { "epoch": 0.13846147053499286, "grad_norm": 1.8671237230300903, "learning_rate": 9.694778601662316e-06, "loss": 0.9022, "step": 3920 }, { "epoch": 0.13849679233870077, "grad_norm": 2.0269112586975098, "learning_rate": 9.694581777797889e-06, "loss": 0.9017, "step": 3921 }, { "epoch": 0.13853211414240868, "grad_norm": 2.3501036167144775, "learning_rate": 9.69438489249166e-06, "loss": 0.8521, "step": 3922 }, { "epoch": 0.1385674359461166, "grad_norm": 1.801894187927246, "learning_rate": 9.694187945746201e-06, "loss": 0.8496, "step": 3923 }, { "epoch": 0.1386027577498245, "grad_norm": 1.7889939546585083, "learning_rate": 9.693990937564095e-06, "loss": 0.8631, "step": 3924 }, { "epoch": 0.1386380795535324, "grad_norm": 1.6600844860076904, "learning_rate": 9.693793867947916e-06, "loss": 0.8515, "step": 3925 }, { "epoch": 0.1386734013572403, "grad_norm": 1.5989413261413574, "learning_rate": 9.693596736900247e-06, "loss": 0.8528, "step": 3926 }, { "epoch": 0.1387087231609482, "grad_norm": 1.7656793594360352, "learning_rate": 9.693399544423666e-06, "loss": 0.8654, "step": 3927 }, { "epoch": 0.13874404496465612, "grad_norm": 1.7513943910598755, "learning_rate": 9.693202290520752e-06, "loss": 0.8696, "step": 3928 }, { "epoch": 0.13877936676836403, "grad_norm": 1.257441759109497, "learning_rate": 9.693004975194092e-06, "loss": 0.593, "step": 3929 }, { "epoch": 0.13881468857207194, "grad_norm": 1.571678876876831, "learning_rate": 9.692807598446262e-06, "loss": 0.8671, "step": 3930 }, { "epoch": 0.13885001037577985, "grad_norm": 1.6933623552322388, "learning_rate": 9.69261016027985e-06, "loss": 0.8589, "step": 3931 }, { "epoch": 0.13888533217948776, "grad_norm": 1.815555453300476, "learning_rate": 9.692412660697437e-06, "loss": 0.8853, "step": 3932 }, { "epoch": 0.13892065398319564, "grad_norm": 1.7504421472549438, "learning_rate": 9.69221509970161e-06, "loss": 0.8813, "step": 3933 }, { "epoch": 0.13895597578690355, "grad_norm": 1.7174592018127441, "learning_rate": 9.692017477294954e-06, "loss": 0.8719, "step": 3934 }, { "epoch": 0.13899129759061146, "grad_norm": 1.6953465938568115, "learning_rate": 9.691819793480054e-06, "loss": 0.8564, "step": 3935 }, { "epoch": 0.13902661939431937, "grad_norm": 1.7674672603607178, "learning_rate": 9.691622048259501e-06, "loss": 0.8583, "step": 3936 }, { "epoch": 0.13906194119802728, "grad_norm": 1.8158938884735107, "learning_rate": 9.691424241635878e-06, "loss": 0.8535, "step": 3937 }, { "epoch": 0.1390972630017352, "grad_norm": 1.7281527519226074, "learning_rate": 9.69122637361178e-06, "loss": 0.8466, "step": 3938 }, { "epoch": 0.13913258480544308, "grad_norm": 1.9129291772842407, "learning_rate": 9.691028444189789e-06, "loss": 0.9227, "step": 3939 }, { "epoch": 0.139167906609151, "grad_norm": 2.141004800796509, "learning_rate": 9.690830453372502e-06, "loss": 0.8683, "step": 3940 }, { "epoch": 0.1392032284128589, "grad_norm": 1.7738921642303467, "learning_rate": 9.690632401162506e-06, "loss": 0.8452, "step": 3941 }, { "epoch": 0.1392385502165668, "grad_norm": 1.8158376216888428, "learning_rate": 9.690434287562396e-06, "loss": 0.8529, "step": 3942 }, { "epoch": 0.13927387202027472, "grad_norm": 1.778555154800415, "learning_rate": 9.690236112574764e-06, "loss": 0.8524, "step": 3943 }, { "epoch": 0.13930919382398263, "grad_norm": 1.7415529489517212, "learning_rate": 9.690037876202202e-06, "loss": 0.892, "step": 3944 }, { "epoch": 0.13934451562769054, "grad_norm": 1.695003628730774, "learning_rate": 9.689839578447309e-06, "loss": 0.8681, "step": 3945 }, { "epoch": 0.13937983743139842, "grad_norm": 1.9763895273208618, "learning_rate": 9.689641219312675e-06, "loss": 0.8982, "step": 3946 }, { "epoch": 0.13941515923510633, "grad_norm": 2.012002944946289, "learning_rate": 9.689442798800898e-06, "loss": 0.9126, "step": 3947 }, { "epoch": 0.13945048103881424, "grad_norm": 2.5056259632110596, "learning_rate": 9.689244316914576e-06, "loss": 0.8603, "step": 3948 }, { "epoch": 0.13948580284252216, "grad_norm": 1.541019320487976, "learning_rate": 9.689045773656306e-06, "loss": 0.8519, "step": 3949 }, { "epoch": 0.13952112464623007, "grad_norm": 1.7945469617843628, "learning_rate": 9.688847169028684e-06, "loss": 0.8967, "step": 3950 }, { "epoch": 0.13955644644993798, "grad_norm": 1.5620083808898926, "learning_rate": 9.688648503034314e-06, "loss": 0.8386, "step": 3951 }, { "epoch": 0.13959176825364586, "grad_norm": 1.8026249408721924, "learning_rate": 9.688449775675793e-06, "loss": 0.8413, "step": 3952 }, { "epoch": 0.13962709005735377, "grad_norm": 1.6911876201629639, "learning_rate": 9.688250986955722e-06, "loss": 0.8921, "step": 3953 }, { "epoch": 0.13966241186106168, "grad_norm": 1.849382996559143, "learning_rate": 9.688052136876706e-06, "loss": 0.8866, "step": 3954 }, { "epoch": 0.1396977336647696, "grad_norm": 1.8978533744812012, "learning_rate": 9.687853225441343e-06, "loss": 0.8959, "step": 3955 }, { "epoch": 0.1397330554684775, "grad_norm": 2.475173234939575, "learning_rate": 9.687654252652238e-06, "loss": 0.9099, "step": 3956 }, { "epoch": 0.1397683772721854, "grad_norm": 1.7246931791305542, "learning_rate": 9.687455218511997e-06, "loss": 0.8596, "step": 3957 }, { "epoch": 0.13980369907589332, "grad_norm": 1.730544090270996, "learning_rate": 9.68725612302322e-06, "loss": 0.8708, "step": 3958 }, { "epoch": 0.1398390208796012, "grad_norm": 2.272639274597168, "learning_rate": 9.68705696618852e-06, "loss": 0.8536, "step": 3959 }, { "epoch": 0.13987434268330912, "grad_norm": 2.067399501800537, "learning_rate": 9.686857748010496e-06, "loss": 0.8871, "step": 3960 }, { "epoch": 0.13990966448701703, "grad_norm": 1.6587181091308594, "learning_rate": 9.686658468491762e-06, "loss": 0.8537, "step": 3961 }, { "epoch": 0.13994498629072494, "grad_norm": 1.6946393251419067, "learning_rate": 9.68645912763492e-06, "loss": 0.8668, "step": 3962 }, { "epoch": 0.13998030809443285, "grad_norm": 1.8595958948135376, "learning_rate": 9.686259725442583e-06, "loss": 0.8682, "step": 3963 }, { "epoch": 0.14001562989814076, "grad_norm": 1.7635499238967896, "learning_rate": 9.686060261917359e-06, "loss": 0.8968, "step": 3964 }, { "epoch": 0.14005095170184864, "grad_norm": 1.93732488155365, "learning_rate": 9.68586073706186e-06, "loss": 0.8798, "step": 3965 }, { "epoch": 0.14008627350555655, "grad_norm": 1.7259496450424194, "learning_rate": 9.685661150878697e-06, "loss": 0.8921, "step": 3966 }, { "epoch": 0.14012159530926446, "grad_norm": 1.7977036237716675, "learning_rate": 9.685461503370482e-06, "loss": 0.8612, "step": 3967 }, { "epoch": 0.14015691711297237, "grad_norm": 1.66932213306427, "learning_rate": 9.685261794539827e-06, "loss": 0.8689, "step": 3968 }, { "epoch": 0.14019223891668028, "grad_norm": 1.7770591974258423, "learning_rate": 9.685062024389344e-06, "loss": 0.8484, "step": 3969 }, { "epoch": 0.1402275607203882, "grad_norm": 2.0216846466064453, "learning_rate": 9.684862192921654e-06, "loss": 0.8685, "step": 3970 }, { "epoch": 0.1402628825240961, "grad_norm": 1.738043189048767, "learning_rate": 9.684662300139366e-06, "loss": 0.8891, "step": 3971 }, { "epoch": 0.140298204327804, "grad_norm": 1.8410313129425049, "learning_rate": 9.684462346045098e-06, "loss": 0.8818, "step": 3972 }, { "epoch": 0.1403335261315119, "grad_norm": 1.6272852420806885, "learning_rate": 9.684262330641467e-06, "loss": 0.8915, "step": 3973 }, { "epoch": 0.1403688479352198, "grad_norm": 1.7899670600891113, "learning_rate": 9.684062253931093e-06, "loss": 0.8481, "step": 3974 }, { "epoch": 0.14040416973892772, "grad_norm": 1.7982332706451416, "learning_rate": 9.683862115916593e-06, "loss": 0.8532, "step": 3975 }, { "epoch": 0.14043949154263563, "grad_norm": 1.7770086526870728, "learning_rate": 9.683661916600583e-06, "loss": 0.8938, "step": 3976 }, { "epoch": 0.14047481334634354, "grad_norm": 1.9880586862564087, "learning_rate": 9.68346165598569e-06, "loss": 0.8639, "step": 3977 }, { "epoch": 0.14051013515005142, "grad_norm": 2.1057868003845215, "learning_rate": 9.68326133407453e-06, "loss": 0.8909, "step": 3978 }, { "epoch": 0.14054545695375933, "grad_norm": 1.6813465356826782, "learning_rate": 9.683060950869725e-06, "loss": 0.862, "step": 3979 }, { "epoch": 0.14058077875746725, "grad_norm": 1.6553955078125, "learning_rate": 9.6828605063739e-06, "loss": 0.8988, "step": 3980 }, { "epoch": 0.14061610056117516, "grad_norm": 1.8183456659317017, "learning_rate": 9.682660000589676e-06, "loss": 0.8554, "step": 3981 }, { "epoch": 0.14065142236488307, "grad_norm": 1.7191568613052368, "learning_rate": 9.682459433519677e-06, "loss": 0.842, "step": 3982 }, { "epoch": 0.14068674416859098, "grad_norm": 1.8171497583389282, "learning_rate": 9.682258805166532e-06, "loss": 0.9131, "step": 3983 }, { "epoch": 0.1407220659722989, "grad_norm": 1.6472980976104736, "learning_rate": 9.682058115532861e-06, "loss": 0.8775, "step": 3984 }, { "epoch": 0.14075738777600677, "grad_norm": 1.8615672588348389, "learning_rate": 9.681857364621296e-06, "loss": 0.9057, "step": 3985 }, { "epoch": 0.14079270957971468, "grad_norm": 1.9042744636535645, "learning_rate": 9.68165655243446e-06, "loss": 0.8594, "step": 3986 }, { "epoch": 0.1408280313834226, "grad_norm": 1.7762317657470703, "learning_rate": 9.681455678974984e-06, "loss": 0.8929, "step": 3987 }, { "epoch": 0.1408633531871305, "grad_norm": 1.658860445022583, "learning_rate": 9.681254744245497e-06, "loss": 0.8886, "step": 3988 }, { "epoch": 0.1408986749908384, "grad_norm": 1.7638442516326904, "learning_rate": 9.681053748248625e-06, "loss": 0.8734, "step": 3989 }, { "epoch": 0.14093399679454632, "grad_norm": 1.831918478012085, "learning_rate": 9.680852690987004e-06, "loss": 0.8872, "step": 3990 }, { "epoch": 0.1409693185982542, "grad_norm": 1.8700013160705566, "learning_rate": 9.680651572463261e-06, "loss": 0.8749, "step": 3991 }, { "epoch": 0.14100464040196212, "grad_norm": 1.6798917055130005, "learning_rate": 9.680450392680032e-06, "loss": 0.8494, "step": 3992 }, { "epoch": 0.14103996220567003, "grad_norm": 1.8232741355895996, "learning_rate": 9.68024915163995e-06, "loss": 0.9188, "step": 3993 }, { "epoch": 0.14107528400937794, "grad_norm": 1.664888858795166, "learning_rate": 9.680047849345643e-06, "loss": 0.8527, "step": 3994 }, { "epoch": 0.14111060581308585, "grad_norm": 1.7401950359344482, "learning_rate": 9.67984648579975e-06, "loss": 0.8619, "step": 3995 }, { "epoch": 0.14114592761679376, "grad_norm": 1.799322485923767, "learning_rate": 9.679645061004908e-06, "loss": 0.8984, "step": 3996 }, { "epoch": 0.14118124942050167, "grad_norm": 1.947717308998108, "learning_rate": 9.679443574963749e-06, "loss": 0.8711, "step": 3997 }, { "epoch": 0.14121657122420955, "grad_norm": 1.8071527481079102, "learning_rate": 9.679242027678913e-06, "loss": 0.8914, "step": 3998 }, { "epoch": 0.14125189302791746, "grad_norm": 1.9440762996673584, "learning_rate": 9.679040419153037e-06, "loss": 0.8923, "step": 3999 }, { "epoch": 0.14128721483162537, "grad_norm": 1.6751601696014404, "learning_rate": 9.67883874938876e-06, "loss": 0.8853, "step": 4000 }, { "epoch": 0.14132253663533328, "grad_norm": 1.755889654159546, "learning_rate": 9.678637018388721e-06, "loss": 0.8269, "step": 4001 }, { "epoch": 0.1413578584390412, "grad_norm": 1.7623775005340576, "learning_rate": 9.678435226155558e-06, "loss": 0.8559, "step": 4002 }, { "epoch": 0.1413931802427491, "grad_norm": 1.6162214279174805, "learning_rate": 9.678233372691917e-06, "loss": 0.8557, "step": 4003 }, { "epoch": 0.141428502046457, "grad_norm": 1.741684913635254, "learning_rate": 9.678031458000436e-06, "loss": 0.8432, "step": 4004 }, { "epoch": 0.1414638238501649, "grad_norm": 2.128878355026245, "learning_rate": 9.677829482083759e-06, "loss": 0.8911, "step": 4005 }, { "epoch": 0.1414991456538728, "grad_norm": 1.707867980003357, "learning_rate": 9.677627444944528e-06, "loss": 0.8751, "step": 4006 }, { "epoch": 0.14153446745758072, "grad_norm": 1.7146540880203247, "learning_rate": 9.677425346585389e-06, "loss": 0.873, "step": 4007 }, { "epoch": 0.14156978926128863, "grad_norm": 1.6200110912322998, "learning_rate": 9.677223187008987e-06, "loss": 0.869, "step": 4008 }, { "epoch": 0.14160511106499654, "grad_norm": 1.5510889291763306, "learning_rate": 9.677020966217966e-06, "loss": 0.8615, "step": 4009 }, { "epoch": 0.14164043286870445, "grad_norm": 1.6364567279815674, "learning_rate": 9.676818684214974e-06, "loss": 0.8655, "step": 4010 }, { "epoch": 0.14167575467241234, "grad_norm": 1.674279808998108, "learning_rate": 9.676616341002658e-06, "loss": 0.8821, "step": 4011 }, { "epoch": 0.14171107647612025, "grad_norm": 1.7733066082000732, "learning_rate": 9.676413936583667e-06, "loss": 0.8906, "step": 4012 }, { "epoch": 0.14174639827982816, "grad_norm": 1.9192421436309814, "learning_rate": 9.67621147096065e-06, "loss": 0.8699, "step": 4013 }, { "epoch": 0.14178172008353607, "grad_norm": 1.93905770778656, "learning_rate": 9.676008944136254e-06, "loss": 0.8924, "step": 4014 }, { "epoch": 0.14181704188724398, "grad_norm": 1.833863377571106, "learning_rate": 9.675806356113135e-06, "loss": 0.8728, "step": 4015 }, { "epoch": 0.1418523636909519, "grad_norm": 1.6272163391113281, "learning_rate": 9.675603706893939e-06, "loss": 0.8667, "step": 4016 }, { "epoch": 0.14188768549465977, "grad_norm": 1.8249770402908325, "learning_rate": 9.675400996481322e-06, "loss": 0.8359, "step": 4017 }, { "epoch": 0.14192300729836768, "grad_norm": 1.6356468200683594, "learning_rate": 9.675198224877935e-06, "loss": 0.8616, "step": 4018 }, { "epoch": 0.1419583291020756, "grad_norm": 1.7774810791015625, "learning_rate": 9.674995392086433e-06, "loss": 0.8648, "step": 4019 }, { "epoch": 0.1419936509057835, "grad_norm": 1.7764723300933838, "learning_rate": 9.674792498109469e-06, "loss": 0.8826, "step": 4020 }, { "epoch": 0.1420289727094914, "grad_norm": 1.7560721635818481, "learning_rate": 9.6745895429497e-06, "loss": 0.8936, "step": 4021 }, { "epoch": 0.14206429451319932, "grad_norm": 1.8911700248718262, "learning_rate": 9.674386526609782e-06, "loss": 0.8584, "step": 4022 }, { "epoch": 0.14209961631690723, "grad_norm": 1.6854170560836792, "learning_rate": 9.674183449092372e-06, "loss": 0.8644, "step": 4023 }, { "epoch": 0.14213493812061512, "grad_norm": 1.989015817642212, "learning_rate": 9.673980310400129e-06, "loss": 0.8929, "step": 4024 }, { "epoch": 0.14217025992432303, "grad_norm": 1.9921343326568604, "learning_rate": 9.673777110535708e-06, "loss": 0.8668, "step": 4025 }, { "epoch": 0.14220558172803094, "grad_norm": 1.7245731353759766, "learning_rate": 9.673573849501772e-06, "loss": 0.8489, "step": 4026 }, { "epoch": 0.14224090353173885, "grad_norm": 1.7450921535491943, "learning_rate": 9.673370527300979e-06, "loss": 0.8658, "step": 4027 }, { "epoch": 0.14227622533544676, "grad_norm": 1.5895118713378906, "learning_rate": 9.673167143935992e-06, "loss": 0.8517, "step": 4028 }, { "epoch": 0.14231154713915467, "grad_norm": 1.8494991064071655, "learning_rate": 9.672963699409473e-06, "loss": 0.9057, "step": 4029 }, { "epoch": 0.14234686894286258, "grad_norm": 1.7925485372543335, "learning_rate": 9.67276019372408e-06, "loss": 0.8412, "step": 4030 }, { "epoch": 0.14238219074657046, "grad_norm": 1.6568171977996826, "learning_rate": 9.672556626882483e-06, "loss": 0.8392, "step": 4031 }, { "epoch": 0.14241751255027837, "grad_norm": 1.8225566148757935, "learning_rate": 9.672352998887341e-06, "loss": 0.8836, "step": 4032 }, { "epoch": 0.14245283435398629, "grad_norm": 1.759358286857605, "learning_rate": 9.672149309741323e-06, "loss": 0.8578, "step": 4033 }, { "epoch": 0.1424881561576942, "grad_norm": 1.772778868675232, "learning_rate": 9.671945559447094e-06, "loss": 0.8553, "step": 4034 }, { "epoch": 0.1425234779614021, "grad_norm": 1.6904714107513428, "learning_rate": 9.671741748007318e-06, "loss": 0.8867, "step": 4035 }, { "epoch": 0.14255879976511002, "grad_norm": 1.6814792156219482, "learning_rate": 9.671537875424663e-06, "loss": 0.8516, "step": 4036 }, { "epoch": 0.1425941215688179, "grad_norm": 1.8079363107681274, "learning_rate": 9.671333941701802e-06, "loss": 0.8858, "step": 4037 }, { "epoch": 0.1426294433725258, "grad_norm": 1.926926612854004, "learning_rate": 9.671129946841398e-06, "loss": 0.8658, "step": 4038 }, { "epoch": 0.14266476517623372, "grad_norm": 1.80964195728302, "learning_rate": 9.670925890846124e-06, "loss": 0.8514, "step": 4039 }, { "epoch": 0.14270008697994163, "grad_norm": 1.9053958654403687, "learning_rate": 9.670721773718649e-06, "loss": 0.862, "step": 4040 }, { "epoch": 0.14273540878364954, "grad_norm": 1.603742241859436, "learning_rate": 9.670517595461646e-06, "loss": 0.8523, "step": 4041 }, { "epoch": 0.14277073058735745, "grad_norm": 1.6982505321502686, "learning_rate": 9.670313356077787e-06, "loss": 0.8832, "step": 4042 }, { "epoch": 0.14280605239106536, "grad_norm": 1.7642985582351685, "learning_rate": 9.670109055569744e-06, "loss": 0.8823, "step": 4043 }, { "epoch": 0.14284137419477325, "grad_norm": 1.9410650730133057, "learning_rate": 9.669904693940193e-06, "loss": 0.8689, "step": 4044 }, { "epoch": 0.14287669599848116, "grad_norm": 1.8394591808319092, "learning_rate": 9.669700271191806e-06, "loss": 0.87, "step": 4045 }, { "epoch": 0.14291201780218907, "grad_norm": 1.6357799768447876, "learning_rate": 9.66949578732726e-06, "loss": 0.8528, "step": 4046 }, { "epoch": 0.14294733960589698, "grad_norm": 1.779191493988037, "learning_rate": 9.669291242349231e-06, "loss": 0.847, "step": 4047 }, { "epoch": 0.1429826614096049, "grad_norm": 1.7749407291412354, "learning_rate": 9.669086636260396e-06, "loss": 0.8872, "step": 4048 }, { "epoch": 0.1430179832133128, "grad_norm": 1.6302088499069214, "learning_rate": 9.668881969063433e-06, "loss": 0.8525, "step": 4049 }, { "epoch": 0.14305330501702068, "grad_norm": 1.7309083938598633, "learning_rate": 9.66867724076102e-06, "loss": 0.8784, "step": 4050 }, { "epoch": 0.1430886268207286, "grad_norm": 1.8891485929489136, "learning_rate": 9.668472451355837e-06, "loss": 0.8875, "step": 4051 }, { "epoch": 0.1431239486244365, "grad_norm": 1.665246844291687, "learning_rate": 9.668267600850563e-06, "loss": 0.844, "step": 4052 }, { "epoch": 0.14315927042814441, "grad_norm": 3.0178143978118896, "learning_rate": 9.668062689247881e-06, "loss": 0.8717, "step": 4053 }, { "epoch": 0.14319459223185232, "grad_norm": 1.796778678894043, "learning_rate": 9.667857716550473e-06, "loss": 0.8816, "step": 4054 }, { "epoch": 0.14322991403556024, "grad_norm": 1.7509256601333618, "learning_rate": 9.66765268276102e-06, "loss": 0.8561, "step": 4055 }, { "epoch": 0.14326523583926815, "grad_norm": 1.8902106285095215, "learning_rate": 9.667447587882206e-06, "loss": 0.8732, "step": 4056 }, { "epoch": 0.14330055764297603, "grad_norm": 1.6339412927627563, "learning_rate": 9.667242431916716e-06, "loss": 0.8516, "step": 4057 }, { "epoch": 0.14333587944668394, "grad_norm": 1.7395321130752563, "learning_rate": 9.667037214867234e-06, "loss": 0.878, "step": 4058 }, { "epoch": 0.14337120125039185, "grad_norm": 2.0378129482269287, "learning_rate": 9.666831936736445e-06, "loss": 0.8953, "step": 4059 }, { "epoch": 0.14340652305409976, "grad_norm": 1.7485110759735107, "learning_rate": 9.666626597527039e-06, "loss": 0.8651, "step": 4060 }, { "epoch": 0.14344184485780767, "grad_norm": 1.6470073461532593, "learning_rate": 9.6664211972417e-06, "loss": 0.891, "step": 4061 }, { "epoch": 0.14347716666151558, "grad_norm": 1.8202098608016968, "learning_rate": 9.666215735883117e-06, "loss": 0.8499, "step": 4062 }, { "epoch": 0.14351248846522346, "grad_norm": 1.7870453596115112, "learning_rate": 9.66601021345398e-06, "loss": 0.8709, "step": 4063 }, { "epoch": 0.14354781026893138, "grad_norm": 1.8017405271530151, "learning_rate": 9.66580462995698e-06, "loss": 0.8388, "step": 4064 }, { "epoch": 0.14358313207263929, "grad_norm": 1.8120266199111938, "learning_rate": 9.665598985394806e-06, "loss": 0.8601, "step": 4065 }, { "epoch": 0.1436184538763472, "grad_norm": 1.9371742010116577, "learning_rate": 9.665393279770147e-06, "loss": 0.8505, "step": 4066 }, { "epoch": 0.1436537756800551, "grad_norm": 1.6784588098526, "learning_rate": 9.6651875130857e-06, "loss": 0.8611, "step": 4067 }, { "epoch": 0.14368909748376302, "grad_norm": 1.8589072227478027, "learning_rate": 9.664981685344154e-06, "loss": 0.8623, "step": 4068 }, { "epoch": 0.14372441928747093, "grad_norm": 1.8230996131896973, "learning_rate": 9.664775796548207e-06, "loss": 0.8612, "step": 4069 }, { "epoch": 0.1437597410911788, "grad_norm": 1.7382134199142456, "learning_rate": 9.66456984670055e-06, "loss": 0.8451, "step": 4070 }, { "epoch": 0.14379506289488672, "grad_norm": 2.1141200065612793, "learning_rate": 9.66436383580388e-06, "loss": 0.8759, "step": 4071 }, { "epoch": 0.14383038469859463, "grad_norm": 4.238941669464111, "learning_rate": 9.664157763860892e-06, "loss": 0.8622, "step": 4072 }, { "epoch": 0.14386570650230254, "grad_norm": 2.0133962631225586, "learning_rate": 9.663951630874285e-06, "loss": 0.8667, "step": 4073 }, { "epoch": 0.14390102830601045, "grad_norm": 1.8264963626861572, "learning_rate": 9.663745436846755e-06, "loss": 0.8347, "step": 4074 }, { "epoch": 0.14393635010971836, "grad_norm": 1.6430718898773193, "learning_rate": 9.663539181781e-06, "loss": 0.8744, "step": 4075 }, { "epoch": 0.14397167191342625, "grad_norm": 1.693913459777832, "learning_rate": 9.663332865679724e-06, "loss": 0.8648, "step": 4076 }, { "epoch": 0.14400699371713416, "grad_norm": 1.7406926155090332, "learning_rate": 9.663126488545622e-06, "loss": 0.9076, "step": 4077 }, { "epoch": 0.14404231552084207, "grad_norm": 1.6116955280303955, "learning_rate": 9.662920050381398e-06, "loss": 0.8681, "step": 4078 }, { "epoch": 0.14407763732454998, "grad_norm": 1.6347270011901855, "learning_rate": 9.662713551189754e-06, "loss": 0.9066, "step": 4079 }, { "epoch": 0.1441129591282579, "grad_norm": 1.737933874130249, "learning_rate": 9.662506990973389e-06, "loss": 0.8639, "step": 4080 }, { "epoch": 0.1441482809319658, "grad_norm": 1.7704216241836548, "learning_rate": 9.66230036973501e-06, "loss": 0.8955, "step": 4081 }, { "epoch": 0.1441836027356737, "grad_norm": 2.0693886280059814, "learning_rate": 9.662093687477322e-06, "loss": 0.8958, "step": 4082 }, { "epoch": 0.1442189245393816, "grad_norm": 1.8376734256744385, "learning_rate": 9.661886944203025e-06, "loss": 0.8811, "step": 4083 }, { "epoch": 0.1442542463430895, "grad_norm": 1.6470123529434204, "learning_rate": 9.661680139914828e-06, "loss": 0.8691, "step": 4084 }, { "epoch": 0.14428956814679741, "grad_norm": 1.9788230657577515, "learning_rate": 9.66147327461544e-06, "loss": 0.8721, "step": 4085 }, { "epoch": 0.14432488995050533, "grad_norm": 1.6913570165634155, "learning_rate": 9.661266348307565e-06, "loss": 0.8753, "step": 4086 }, { "epoch": 0.14436021175421324, "grad_norm": 1.7505359649658203, "learning_rate": 9.661059360993912e-06, "loss": 0.8585, "step": 4087 }, { "epoch": 0.14439553355792115, "grad_norm": 1.62080717086792, "learning_rate": 9.660852312677191e-06, "loss": 0.8894, "step": 4088 }, { "epoch": 0.14443085536162903, "grad_norm": 1.7026095390319824, "learning_rate": 9.66064520336011e-06, "loss": 0.8628, "step": 4089 }, { "epoch": 0.14446617716533694, "grad_norm": 1.769762635231018, "learning_rate": 9.660438033045383e-06, "loss": 0.8931, "step": 4090 }, { "epoch": 0.14450149896904485, "grad_norm": 1.818688154220581, "learning_rate": 9.660230801735717e-06, "loss": 0.8728, "step": 4091 }, { "epoch": 0.14453682077275276, "grad_norm": 1.8727500438690186, "learning_rate": 9.660023509433826e-06, "loss": 0.8749, "step": 4092 }, { "epoch": 0.14457214257646067, "grad_norm": 2.1147189140319824, "learning_rate": 9.659816156142425e-06, "loss": 0.8498, "step": 4093 }, { "epoch": 0.14460746438016858, "grad_norm": 1.6118640899658203, "learning_rate": 9.659608741864225e-06, "loss": 0.8741, "step": 4094 }, { "epoch": 0.1446427861838765, "grad_norm": 1.8618355989456177, "learning_rate": 9.659401266601942e-06, "loss": 0.8566, "step": 4095 }, { "epoch": 0.14467810798758438, "grad_norm": 2.2340402603149414, "learning_rate": 9.659193730358292e-06, "loss": 0.8964, "step": 4096 }, { "epoch": 0.1447134297912923, "grad_norm": 1.833338975906372, "learning_rate": 9.658986133135988e-06, "loss": 0.8982, "step": 4097 }, { "epoch": 0.1447487515950002, "grad_norm": 1.707353949546814, "learning_rate": 9.658778474937751e-06, "loss": 0.8688, "step": 4098 }, { "epoch": 0.1447840733987081, "grad_norm": 1.74473237991333, "learning_rate": 9.658570755766296e-06, "loss": 0.8853, "step": 4099 }, { "epoch": 0.14481939520241602, "grad_norm": 1.8816165924072266, "learning_rate": 9.658362975624344e-06, "loss": 0.8941, "step": 4100 }, { "epoch": 0.14485471700612393, "grad_norm": 1.6634348630905151, "learning_rate": 9.658155134514613e-06, "loss": 0.8838, "step": 4101 }, { "epoch": 0.1448900388098318, "grad_norm": 1.7172564268112183, "learning_rate": 9.657947232439822e-06, "loss": 0.893, "step": 4102 }, { "epoch": 0.14492536061353972, "grad_norm": 1.655889868736267, "learning_rate": 9.657739269402692e-06, "loss": 0.8655, "step": 4103 }, { "epoch": 0.14496068241724763, "grad_norm": 1.791750192642212, "learning_rate": 9.657531245405948e-06, "loss": 0.8633, "step": 4104 }, { "epoch": 0.14499600422095554, "grad_norm": 2.018986463546753, "learning_rate": 9.657323160452312e-06, "loss": 0.8916, "step": 4105 }, { "epoch": 0.14503132602466345, "grad_norm": 1.895735740661621, "learning_rate": 9.657115014544505e-06, "loss": 0.8487, "step": 4106 }, { "epoch": 0.14506664782837136, "grad_norm": 1.8984934091567993, "learning_rate": 9.656906807685252e-06, "loss": 0.831, "step": 4107 }, { "epoch": 0.14510196963207928, "grad_norm": 1.8729451894760132, "learning_rate": 9.656698539877276e-06, "loss": 0.8883, "step": 4108 }, { "epoch": 0.14513729143578716, "grad_norm": 1.8626339435577393, "learning_rate": 9.656490211123307e-06, "loss": 0.8699, "step": 4109 }, { "epoch": 0.14517261323949507, "grad_norm": 1.6696969270706177, "learning_rate": 9.656281821426068e-06, "loss": 0.8141, "step": 4110 }, { "epoch": 0.14520793504320298, "grad_norm": 1.9062057733535767, "learning_rate": 9.65607337078829e-06, "loss": 0.8587, "step": 4111 }, { "epoch": 0.1452432568469109, "grad_norm": 1.6317791938781738, "learning_rate": 9.655864859212697e-06, "loss": 0.8573, "step": 4112 }, { "epoch": 0.1452785786506188, "grad_norm": 1.6492111682891846, "learning_rate": 9.655656286702019e-06, "loss": 0.8376, "step": 4113 }, { "epoch": 0.1453139004543267, "grad_norm": 1.8233356475830078, "learning_rate": 9.65544765325899e-06, "loss": 0.8609, "step": 4114 }, { "epoch": 0.1453492222580346, "grad_norm": 1.745963454246521, "learning_rate": 9.655238958886336e-06, "loss": 0.8783, "step": 4115 }, { "epoch": 0.1453845440617425, "grad_norm": 1.8127765655517578, "learning_rate": 9.655030203586788e-06, "loss": 0.901, "step": 4116 }, { "epoch": 0.14541986586545042, "grad_norm": 1.7186375856399536, "learning_rate": 9.654821387363082e-06, "loss": 0.8742, "step": 4117 }, { "epoch": 0.14545518766915833, "grad_norm": 1.648463487625122, "learning_rate": 9.654612510217946e-06, "loss": 0.8556, "step": 4118 }, { "epoch": 0.14549050947286624, "grad_norm": 1.8351949453353882, "learning_rate": 9.654403572154119e-06, "loss": 0.8755, "step": 4119 }, { "epoch": 0.14552583127657415, "grad_norm": 1.8522990942001343, "learning_rate": 9.65419457317433e-06, "loss": 0.8977, "step": 4120 }, { "epoch": 0.14556115308028206, "grad_norm": 1.8559231758117676, "learning_rate": 9.65398551328132e-06, "loss": 0.866, "step": 4121 }, { "epoch": 0.14559647488398994, "grad_norm": 1.8494497537612915, "learning_rate": 9.653776392477822e-06, "loss": 0.8609, "step": 4122 }, { "epoch": 0.14563179668769785, "grad_norm": 1.914613962173462, "learning_rate": 9.653567210766575e-06, "loss": 0.8924, "step": 4123 }, { "epoch": 0.14566711849140576, "grad_norm": 1.7507041692733765, "learning_rate": 9.653357968150313e-06, "loss": 0.8674, "step": 4124 }, { "epoch": 0.14570244029511367, "grad_norm": 1.850701928138733, "learning_rate": 9.653148664631777e-06, "loss": 0.9053, "step": 4125 }, { "epoch": 0.14573776209882158, "grad_norm": 1.6566457748413086, "learning_rate": 9.652939300213705e-06, "loss": 0.8713, "step": 4126 }, { "epoch": 0.1457730839025295, "grad_norm": 1.746166706085205, "learning_rate": 9.652729874898838e-06, "loss": 0.8788, "step": 4127 }, { "epoch": 0.14580840570623738, "grad_norm": 1.866909146308899, "learning_rate": 9.652520388689919e-06, "loss": 0.8963, "step": 4128 }, { "epoch": 0.1458437275099453, "grad_norm": 1.7126121520996094, "learning_rate": 9.652310841589686e-06, "loss": 0.8717, "step": 4129 }, { "epoch": 0.1458790493136532, "grad_norm": 1.099441647529602, "learning_rate": 9.652101233600885e-06, "loss": 0.6039, "step": 4130 }, { "epoch": 0.1459143711173611, "grad_norm": 1.909324049949646, "learning_rate": 9.651891564726256e-06, "loss": 0.8869, "step": 4131 }, { "epoch": 0.14594969292106902, "grad_norm": 1.894183874130249, "learning_rate": 9.651681834968544e-06, "loss": 0.8461, "step": 4132 }, { "epoch": 0.14598501472477693, "grad_norm": 1.9042707681655884, "learning_rate": 9.651472044330497e-06, "loss": 0.8643, "step": 4133 }, { "epoch": 0.14602033652848484, "grad_norm": 2.007189989089966, "learning_rate": 9.651262192814858e-06, "loss": 0.8709, "step": 4134 }, { "epoch": 0.14605565833219272, "grad_norm": 1.8535808324813843, "learning_rate": 9.651052280424372e-06, "loss": 0.9104, "step": 4135 }, { "epoch": 0.14609098013590063, "grad_norm": 2.297652006149292, "learning_rate": 9.65084230716179e-06, "loss": 0.8778, "step": 4136 }, { "epoch": 0.14612630193960854, "grad_norm": 1.9006649255752563, "learning_rate": 9.650632273029857e-06, "loss": 0.9006, "step": 4137 }, { "epoch": 0.14616162374331645, "grad_norm": 1.811360239982605, "learning_rate": 9.650422178031323e-06, "loss": 0.9043, "step": 4138 }, { "epoch": 0.14619694554702437, "grad_norm": 2.0185372829437256, "learning_rate": 9.650212022168937e-06, "loss": 0.847, "step": 4139 }, { "epoch": 0.14623226735073228, "grad_norm": 2.343454122543335, "learning_rate": 9.65000180544545e-06, "loss": 0.8812, "step": 4140 }, { "epoch": 0.14626758915444016, "grad_norm": 1.765956997871399, "learning_rate": 9.649791527863615e-06, "loss": 0.9053, "step": 4141 }, { "epoch": 0.14630291095814807, "grad_norm": 2.5971970558166504, "learning_rate": 9.649581189426182e-06, "loss": 0.8673, "step": 4142 }, { "epoch": 0.14633823276185598, "grad_norm": 1.849094271659851, "learning_rate": 9.649370790135905e-06, "loss": 0.8802, "step": 4143 }, { "epoch": 0.1463735545655639, "grad_norm": 1.9458736181259155, "learning_rate": 9.649160329995537e-06, "loss": 0.8871, "step": 4144 }, { "epoch": 0.1464088763692718, "grad_norm": 1.8794392347335815, "learning_rate": 9.64894980900783e-06, "loss": 0.9032, "step": 4145 }, { "epoch": 0.1464441981729797, "grad_norm": 1.8276742696762085, "learning_rate": 9.648739227175546e-06, "loss": 0.9014, "step": 4146 }, { "epoch": 0.14647951997668762, "grad_norm": 1.820945382118225, "learning_rate": 9.648528584501434e-06, "loss": 0.8683, "step": 4147 }, { "epoch": 0.1465148417803955, "grad_norm": 1.9112756252288818, "learning_rate": 9.648317880988254e-06, "loss": 0.8742, "step": 4148 }, { "epoch": 0.14655016358410342, "grad_norm": 2.12894606590271, "learning_rate": 9.648107116638764e-06, "loss": 0.8502, "step": 4149 }, { "epoch": 0.14658548538781133, "grad_norm": 1.7855291366577148, "learning_rate": 9.647896291455721e-06, "loss": 0.8893, "step": 4150 }, { "epoch": 0.14662080719151924, "grad_norm": 1.8659420013427734, "learning_rate": 9.647685405441886e-06, "loss": 0.8531, "step": 4151 }, { "epoch": 0.14665612899522715, "grad_norm": 1.8668832778930664, "learning_rate": 9.647474458600017e-06, "loss": 0.8306, "step": 4152 }, { "epoch": 0.14669145079893506, "grad_norm": 1.859171986579895, "learning_rate": 9.647263450932877e-06, "loss": 0.8791, "step": 4153 }, { "epoch": 0.14672677260264294, "grad_norm": 2.0241284370422363, "learning_rate": 9.647052382443226e-06, "loss": 0.8826, "step": 4154 }, { "epoch": 0.14676209440635085, "grad_norm": 1.80441415309906, "learning_rate": 9.646841253133827e-06, "loss": 0.8188, "step": 4155 }, { "epoch": 0.14679741621005876, "grad_norm": 2.1253771781921387, "learning_rate": 9.646630063007442e-06, "loss": 0.9013, "step": 4156 }, { "epoch": 0.14683273801376667, "grad_norm": 2.0384154319763184, "learning_rate": 9.646418812066838e-06, "loss": 0.874, "step": 4157 }, { "epoch": 0.14686805981747458, "grad_norm": 1.916076421737671, "learning_rate": 9.64620750031478e-06, "loss": 0.9212, "step": 4158 }, { "epoch": 0.1469033816211825, "grad_norm": 1.950907826423645, "learning_rate": 9.645996127754027e-06, "loss": 0.8387, "step": 4159 }, { "epoch": 0.1469387034248904, "grad_norm": 1.7798360586166382, "learning_rate": 9.645784694387352e-06, "loss": 0.874, "step": 4160 }, { "epoch": 0.1469740252285983, "grad_norm": 1.8832080364227295, "learning_rate": 9.645573200217522e-06, "loss": 0.9067, "step": 4161 }, { "epoch": 0.1470093470323062, "grad_norm": 2.738781452178955, "learning_rate": 9.645361645247301e-06, "loss": 0.8705, "step": 4162 }, { "epoch": 0.1470446688360141, "grad_norm": 1.8735119104385376, "learning_rate": 9.64515002947946e-06, "loss": 0.8429, "step": 4163 }, { "epoch": 0.14707999063972202, "grad_norm": 1.9527983665466309, "learning_rate": 9.64493835291677e-06, "loss": 0.862, "step": 4164 }, { "epoch": 0.14711531244342993, "grad_norm": 2.0973141193389893, "learning_rate": 9.644726615562e-06, "loss": 0.8925, "step": 4165 }, { "epoch": 0.14715063424713784, "grad_norm": 2.1428334712982178, "learning_rate": 9.64451481741792e-06, "loss": 0.8848, "step": 4166 }, { "epoch": 0.14718595605084572, "grad_norm": 1.8010492324829102, "learning_rate": 9.644302958487303e-06, "loss": 0.8461, "step": 4167 }, { "epoch": 0.14722127785455363, "grad_norm": 2.0169761180877686, "learning_rate": 9.644091038772923e-06, "loss": 0.837, "step": 4168 }, { "epoch": 0.14725659965826154, "grad_norm": 1.841230869293213, "learning_rate": 9.643879058277553e-06, "loss": 0.8879, "step": 4169 }, { "epoch": 0.14729192146196946, "grad_norm": 2.2529711723327637, "learning_rate": 9.643667017003965e-06, "loss": 0.8591, "step": 4170 }, { "epoch": 0.14732724326567737, "grad_norm": 1.7999504804611206, "learning_rate": 9.643454914954938e-06, "loss": 0.8948, "step": 4171 }, { "epoch": 0.14736256506938528, "grad_norm": 1.7664085626602173, "learning_rate": 9.643242752133244e-06, "loss": 0.8397, "step": 4172 }, { "epoch": 0.1473978868730932, "grad_norm": 1.6656889915466309, "learning_rate": 9.643030528541662e-06, "loss": 0.9053, "step": 4173 }, { "epoch": 0.14743320867680107, "grad_norm": 2.0869174003601074, "learning_rate": 9.64281824418297e-06, "loss": 0.879, "step": 4174 }, { "epoch": 0.14746853048050898, "grad_norm": 1.7335617542266846, "learning_rate": 9.642605899059945e-06, "loss": 0.8537, "step": 4175 }, { "epoch": 0.1475038522842169, "grad_norm": 1.6736021041870117, "learning_rate": 9.642393493175366e-06, "loss": 0.8536, "step": 4176 }, { "epoch": 0.1475391740879248, "grad_norm": 1.6845418214797974, "learning_rate": 9.642181026532015e-06, "loss": 0.828, "step": 4177 }, { "epoch": 0.1475744958916327, "grad_norm": 1.8036341667175293, "learning_rate": 9.641968499132671e-06, "loss": 0.8661, "step": 4178 }, { "epoch": 0.14760981769534062, "grad_norm": 1.7774180173873901, "learning_rate": 9.641755910980115e-06, "loss": 0.8669, "step": 4179 }, { "epoch": 0.1476451394990485, "grad_norm": 1.6204582452774048, "learning_rate": 9.641543262077132e-06, "loss": 0.837, "step": 4180 }, { "epoch": 0.14768046130275642, "grad_norm": 1.8706780672073364, "learning_rate": 9.641330552426502e-06, "loss": 0.8697, "step": 4181 }, { "epoch": 0.14771578310646433, "grad_norm": 2.205007314682007, "learning_rate": 9.64111778203101e-06, "loss": 0.8612, "step": 4182 }, { "epoch": 0.14775110491017224, "grad_norm": 1.8186612129211426, "learning_rate": 9.640904950893441e-06, "loss": 0.8994, "step": 4183 }, { "epoch": 0.14778642671388015, "grad_norm": 1.6554404497146606, "learning_rate": 9.640692059016582e-06, "loss": 0.8338, "step": 4184 }, { "epoch": 0.14782174851758806, "grad_norm": 2.024911880493164, "learning_rate": 9.640479106403216e-06, "loss": 0.8833, "step": 4185 }, { "epoch": 0.14785707032129597, "grad_norm": 2.000535488128662, "learning_rate": 9.640266093056132e-06, "loss": 0.8746, "step": 4186 }, { "epoch": 0.14789239212500385, "grad_norm": 2.107736825942993, "learning_rate": 9.64005301897812e-06, "loss": 0.8758, "step": 4187 }, { "epoch": 0.14792771392871176, "grad_norm": 1.9906086921691895, "learning_rate": 9.639839884171963e-06, "loss": 0.88, "step": 4188 }, { "epoch": 0.14796303573241967, "grad_norm": 1.7279441356658936, "learning_rate": 9.639626688640455e-06, "loss": 0.8769, "step": 4189 }, { "epoch": 0.14799835753612758, "grad_norm": 1.6170886754989624, "learning_rate": 9.639413432386386e-06, "loss": 0.8482, "step": 4190 }, { "epoch": 0.1480336793398355, "grad_norm": 1.9161566495895386, "learning_rate": 9.639200115412545e-06, "loss": 0.873, "step": 4191 }, { "epoch": 0.1480690011435434, "grad_norm": 1.7860167026519775, "learning_rate": 9.638986737721725e-06, "loss": 0.844, "step": 4192 }, { "epoch": 0.1481043229472513, "grad_norm": 1.9206451177597046, "learning_rate": 9.63877329931672e-06, "loss": 0.9039, "step": 4193 }, { "epoch": 0.1481396447509592, "grad_norm": 1.9767826795578003, "learning_rate": 9.63855980020032e-06, "loss": 0.8583, "step": 4194 }, { "epoch": 0.1481749665546671, "grad_norm": 1.905028223991394, "learning_rate": 9.638346240375321e-06, "loss": 0.8482, "step": 4195 }, { "epoch": 0.14821028835837502, "grad_norm": 2.1686317920684814, "learning_rate": 9.63813261984452e-06, "loss": 0.8853, "step": 4196 }, { "epoch": 0.14824561016208293, "grad_norm": 1.9224092960357666, "learning_rate": 9.637918938610711e-06, "loss": 0.8653, "step": 4197 }, { "epoch": 0.14828093196579084, "grad_norm": 2.350771188735962, "learning_rate": 9.63770519667669e-06, "loss": 0.8979, "step": 4198 }, { "epoch": 0.14831625376949875, "grad_norm": 1.770113229751587, "learning_rate": 9.637491394045256e-06, "loss": 0.8939, "step": 4199 }, { "epoch": 0.14835157557320663, "grad_norm": 2.0632097721099854, "learning_rate": 9.637277530719205e-06, "loss": 0.8807, "step": 4200 }, { "epoch": 0.14838689737691454, "grad_norm": 1.833900809288025, "learning_rate": 9.63706360670134e-06, "loss": 0.8582, "step": 4201 }, { "epoch": 0.14842221918062246, "grad_norm": 1.9364086389541626, "learning_rate": 9.636849621994455e-06, "loss": 0.8895, "step": 4202 }, { "epoch": 0.14845754098433037, "grad_norm": 1.7846224308013916, "learning_rate": 9.636635576601356e-06, "loss": 0.896, "step": 4203 }, { "epoch": 0.14849286278803828, "grad_norm": 1.7735079526901245, "learning_rate": 9.636421470524841e-06, "loss": 0.8833, "step": 4204 }, { "epoch": 0.1485281845917462, "grad_norm": 1.5515787601470947, "learning_rate": 9.636207303767714e-06, "loss": 0.8287, "step": 4205 }, { "epoch": 0.14856350639545407, "grad_norm": 1.76618492603302, "learning_rate": 9.635993076332778e-06, "loss": 0.8488, "step": 4206 }, { "epoch": 0.14859882819916198, "grad_norm": 1.7980072498321533, "learning_rate": 9.635778788222833e-06, "loss": 0.9175, "step": 4207 }, { "epoch": 0.1486341500028699, "grad_norm": 1.7396631240844727, "learning_rate": 9.63556443944069e-06, "loss": 0.824, "step": 4208 }, { "epoch": 0.1486694718065778, "grad_norm": 1.9453026056289673, "learning_rate": 9.63535002998915e-06, "loss": 0.8704, "step": 4209 }, { "epoch": 0.1487047936102857, "grad_norm": 1.8540098667144775, "learning_rate": 9.63513555987102e-06, "loss": 0.8793, "step": 4210 }, { "epoch": 0.14874011541399362, "grad_norm": 1.6809381246566772, "learning_rate": 9.634921029089108e-06, "loss": 0.8494, "step": 4211 }, { "epoch": 0.14877543721770153, "grad_norm": 1.6081892251968384, "learning_rate": 9.634706437646219e-06, "loss": 0.8912, "step": 4212 }, { "epoch": 0.14881075902140942, "grad_norm": 1.863187551498413, "learning_rate": 9.634491785545163e-06, "loss": 0.8622, "step": 4213 }, { "epoch": 0.14884608082511733, "grad_norm": 2.018014907836914, "learning_rate": 9.634277072788752e-06, "loss": 0.8524, "step": 4214 }, { "epoch": 0.14888140262882524, "grad_norm": 1.7831326723098755, "learning_rate": 9.634062299379795e-06, "loss": 0.8925, "step": 4215 }, { "epoch": 0.14891672443253315, "grad_norm": 1.879123330116272, "learning_rate": 9.633847465321098e-06, "loss": 0.8584, "step": 4216 }, { "epoch": 0.14895204623624106, "grad_norm": 1.8539882898330688, "learning_rate": 9.633632570615478e-06, "loss": 0.8999, "step": 4217 }, { "epoch": 0.14898736803994897, "grad_norm": 1.8131500482559204, "learning_rate": 9.633417615265746e-06, "loss": 0.8726, "step": 4218 }, { "epoch": 0.14902268984365685, "grad_norm": 1.922179937362671, "learning_rate": 9.633202599274716e-06, "loss": 0.8683, "step": 4219 }, { "epoch": 0.14905801164736476, "grad_norm": 1.8965415954589844, "learning_rate": 9.6329875226452e-06, "loss": 0.8736, "step": 4220 }, { "epoch": 0.14909333345107267, "grad_norm": 1.8099663257598877, "learning_rate": 9.632772385380013e-06, "loss": 0.9189, "step": 4221 }, { "epoch": 0.14912865525478058, "grad_norm": 2.1434249877929688, "learning_rate": 9.632557187481975e-06, "loss": 0.8627, "step": 4222 }, { "epoch": 0.1491639770584885, "grad_norm": 1.8284515142440796, "learning_rate": 9.6323419289539e-06, "loss": 0.8595, "step": 4223 }, { "epoch": 0.1491992988621964, "grad_norm": 1.775956392288208, "learning_rate": 9.6321266097986e-06, "loss": 0.861, "step": 4224 }, { "epoch": 0.14923462066590432, "grad_norm": 2.0516915321350098, "learning_rate": 9.631911230018902e-06, "loss": 0.9058, "step": 4225 }, { "epoch": 0.1492699424696122, "grad_norm": 1.816937804222107, "learning_rate": 9.631695789617618e-06, "loss": 0.847, "step": 4226 }, { "epoch": 0.1493052642733201, "grad_norm": 1.852365255355835, "learning_rate": 9.63148028859757e-06, "loss": 0.8826, "step": 4227 }, { "epoch": 0.14934058607702802, "grad_norm": 1.7458996772766113, "learning_rate": 9.63126472696158e-06, "loss": 0.8712, "step": 4228 }, { "epoch": 0.14937590788073593, "grad_norm": 1.9768372774124146, "learning_rate": 9.631049104712468e-06, "loss": 0.8551, "step": 4229 }, { "epoch": 0.14941122968444384, "grad_norm": 1.8097316026687622, "learning_rate": 9.630833421853055e-06, "loss": 0.8978, "step": 4230 }, { "epoch": 0.14944655148815175, "grad_norm": 1.9696986675262451, "learning_rate": 9.630617678386165e-06, "loss": 0.8658, "step": 4231 }, { "epoch": 0.14948187329185966, "grad_norm": 1.7553335428237915, "learning_rate": 9.630401874314621e-06, "loss": 0.8659, "step": 4232 }, { "epoch": 0.14951719509556755, "grad_norm": 1.7846455574035645, "learning_rate": 9.630186009641249e-06, "loss": 0.8792, "step": 4233 }, { "epoch": 0.14955251689927546, "grad_norm": 1.6587532758712769, "learning_rate": 9.629970084368871e-06, "loss": 0.8715, "step": 4234 }, { "epoch": 0.14958783870298337, "grad_norm": 1.9610166549682617, "learning_rate": 9.629754098500316e-06, "loss": 0.8516, "step": 4235 }, { "epoch": 0.14962316050669128, "grad_norm": 1.7730354070663452, "learning_rate": 9.62953805203841e-06, "loss": 0.8877, "step": 4236 }, { "epoch": 0.1496584823103992, "grad_norm": 1.7532093524932861, "learning_rate": 9.629321944985979e-06, "loss": 0.8676, "step": 4237 }, { "epoch": 0.1496938041141071, "grad_norm": 1.8326448202133179, "learning_rate": 9.629105777345854e-06, "loss": 0.8663, "step": 4238 }, { "epoch": 0.14972912591781498, "grad_norm": 1.7477470636367798, "learning_rate": 9.628889549120862e-06, "loss": 0.8666, "step": 4239 }, { "epoch": 0.1497644477215229, "grad_norm": 1.9296362400054932, "learning_rate": 9.628673260313832e-06, "loss": 0.85, "step": 4240 }, { "epoch": 0.1497997695252308, "grad_norm": 1.7498387098312378, "learning_rate": 9.6284569109276e-06, "loss": 0.8511, "step": 4241 }, { "epoch": 0.1498350913289387, "grad_norm": 2.0012660026550293, "learning_rate": 9.628240500964992e-06, "loss": 0.8917, "step": 4242 }, { "epoch": 0.14987041313264662, "grad_norm": 1.7842707633972168, "learning_rate": 9.628024030428843e-06, "loss": 0.9226, "step": 4243 }, { "epoch": 0.14990573493635453, "grad_norm": 1.1141387224197388, "learning_rate": 9.627807499321984e-06, "loss": 0.6138, "step": 4244 }, { "epoch": 0.14994105674006244, "grad_norm": 1.7167494297027588, "learning_rate": 9.62759090764725e-06, "loss": 0.8621, "step": 4245 }, { "epoch": 0.14997637854377033, "grad_norm": 1.9351544380187988, "learning_rate": 9.627374255407479e-06, "loss": 0.9328, "step": 4246 }, { "epoch": 0.15001170034747824, "grad_norm": 2.232358694076538, "learning_rate": 9.6271575426055e-06, "loss": 0.8581, "step": 4247 }, { "epoch": 0.15004702215118615, "grad_norm": 2.0340168476104736, "learning_rate": 9.626940769244156e-06, "loss": 0.8512, "step": 4248 }, { "epoch": 0.15008234395489406, "grad_norm": 1.8196626901626587, "learning_rate": 9.62672393532628e-06, "loss": 0.9053, "step": 4249 }, { "epoch": 0.15011766575860197, "grad_norm": 1.8047161102294922, "learning_rate": 9.62650704085471e-06, "loss": 0.8177, "step": 4250 }, { "epoch": 0.15015298756230988, "grad_norm": 2.178222179412842, "learning_rate": 9.626290085832287e-06, "loss": 0.8565, "step": 4251 }, { "epoch": 0.15018830936601776, "grad_norm": 1.8618748188018799, "learning_rate": 9.626073070261848e-06, "loss": 0.9044, "step": 4252 }, { "epoch": 0.15022363116972567, "grad_norm": 1.663891315460205, "learning_rate": 9.625855994146235e-06, "loss": 0.8909, "step": 4253 }, { "epoch": 0.15025895297343358, "grad_norm": 2.23228120803833, "learning_rate": 9.625638857488287e-06, "loss": 0.8574, "step": 4254 }, { "epoch": 0.1502942747771415, "grad_norm": 1.761460304260254, "learning_rate": 9.62542166029085e-06, "loss": 0.8769, "step": 4255 }, { "epoch": 0.1503295965808494, "grad_norm": 2.140536308288574, "learning_rate": 9.62520440255676e-06, "loss": 0.8571, "step": 4256 }, { "epoch": 0.15036491838455732, "grad_norm": 1.770770788192749, "learning_rate": 9.624987084288867e-06, "loss": 0.8567, "step": 4257 }, { "epoch": 0.15040024018826523, "grad_norm": 1.7756518125534058, "learning_rate": 9.624769705490011e-06, "loss": 0.867, "step": 4258 }, { "epoch": 0.1504355619919731, "grad_norm": 1.6372802257537842, "learning_rate": 9.624552266163039e-06, "loss": 0.8622, "step": 4259 }, { "epoch": 0.15047088379568102, "grad_norm": 1.8568552732467651, "learning_rate": 9.624334766310797e-06, "loss": 0.8749, "step": 4260 }, { "epoch": 0.15050620559938893, "grad_norm": 2.0884008407592773, "learning_rate": 9.62411720593613e-06, "loss": 0.8545, "step": 4261 }, { "epoch": 0.15054152740309684, "grad_norm": 1.7517845630645752, "learning_rate": 9.623899585041886e-06, "loss": 0.8658, "step": 4262 }, { "epoch": 0.15057684920680475, "grad_norm": 1.6427762508392334, "learning_rate": 9.623681903630913e-06, "loss": 0.8517, "step": 4263 }, { "epoch": 0.15061217101051266, "grad_norm": 1.792142629623413, "learning_rate": 9.623464161706062e-06, "loss": 0.8679, "step": 4264 }, { "epoch": 0.15064749281422055, "grad_norm": 1.8534055948257446, "learning_rate": 9.62324635927018e-06, "loss": 0.8673, "step": 4265 }, { "epoch": 0.15068281461792846, "grad_norm": 1.8155707120895386, "learning_rate": 9.623028496326119e-06, "loss": 0.8709, "step": 4266 }, { "epoch": 0.15071813642163637, "grad_norm": 1.7211108207702637, "learning_rate": 9.62281057287673e-06, "loss": 0.8319, "step": 4267 }, { "epoch": 0.15075345822534428, "grad_norm": 2.2000248432159424, "learning_rate": 9.622592588924866e-06, "loss": 0.8883, "step": 4268 }, { "epoch": 0.1507887800290522, "grad_norm": 1.7979174852371216, "learning_rate": 9.622374544473378e-06, "loss": 0.8568, "step": 4269 }, { "epoch": 0.1508241018327601, "grad_norm": 2.0379817485809326, "learning_rate": 9.62215643952512e-06, "loss": 0.875, "step": 4270 }, { "epoch": 0.150859423636468, "grad_norm": 1.660081148147583, "learning_rate": 9.621938274082949e-06, "loss": 0.878, "step": 4271 }, { "epoch": 0.1508947454401759, "grad_norm": 1.8173061609268188, "learning_rate": 9.62172004814972e-06, "loss": 0.8493, "step": 4272 }, { "epoch": 0.1509300672438838, "grad_norm": 1.753940463066101, "learning_rate": 9.621501761728287e-06, "loss": 0.8438, "step": 4273 }, { "epoch": 0.1509653890475917, "grad_norm": 1.6960749626159668, "learning_rate": 9.621283414821505e-06, "loss": 0.8487, "step": 4274 }, { "epoch": 0.15100071085129962, "grad_norm": 1.6834723949432373, "learning_rate": 9.621065007432237e-06, "loss": 0.87, "step": 4275 }, { "epoch": 0.15103603265500753, "grad_norm": 1.7423902750015259, "learning_rate": 9.620846539563337e-06, "loss": 0.8268, "step": 4276 }, { "epoch": 0.15107135445871545, "grad_norm": 1.6528130769729614, "learning_rate": 9.620628011217668e-06, "loss": 0.8577, "step": 4277 }, { "epoch": 0.15110667626242333, "grad_norm": 1.7834599018096924, "learning_rate": 9.620409422398087e-06, "loss": 0.8726, "step": 4278 }, { "epoch": 0.15114199806613124, "grad_norm": 2.0067358016967773, "learning_rate": 9.620190773107458e-06, "loss": 0.8716, "step": 4279 }, { "epoch": 0.15117731986983915, "grad_norm": 1.8046404123306274, "learning_rate": 9.619972063348638e-06, "loss": 0.8668, "step": 4280 }, { "epoch": 0.15121264167354706, "grad_norm": 1.72409188747406, "learning_rate": 9.619753293124495e-06, "loss": 0.8126, "step": 4281 }, { "epoch": 0.15124796347725497, "grad_norm": 1.7528631687164307, "learning_rate": 9.619534462437886e-06, "loss": 0.8656, "step": 4282 }, { "epoch": 0.15128328528096288, "grad_norm": 1.835174322128296, "learning_rate": 9.61931557129168e-06, "loss": 0.8718, "step": 4283 }, { "epoch": 0.1513186070846708, "grad_norm": 1.7885794639587402, "learning_rate": 9.61909661968874e-06, "loss": 0.8398, "step": 4284 }, { "epoch": 0.15135392888837867, "grad_norm": 1.7483254671096802, "learning_rate": 9.618877607631932e-06, "loss": 0.8826, "step": 4285 }, { "epoch": 0.15138925069208659, "grad_norm": 1.2963749170303345, "learning_rate": 9.618658535124122e-06, "loss": 0.6145, "step": 4286 }, { "epoch": 0.1514245724957945, "grad_norm": 2.464367389678955, "learning_rate": 9.618439402168176e-06, "loss": 0.8927, "step": 4287 }, { "epoch": 0.1514598942995024, "grad_norm": 1.7422598600387573, "learning_rate": 9.618220208766966e-06, "loss": 0.8663, "step": 4288 }, { "epoch": 0.15149521610321032, "grad_norm": 2.0796730518341064, "learning_rate": 9.618000954923356e-06, "loss": 0.8628, "step": 4289 }, { "epoch": 0.15153053790691823, "grad_norm": 1.9225131273269653, "learning_rate": 9.617781640640218e-06, "loss": 0.8646, "step": 4290 }, { "epoch": 0.1515658597106261, "grad_norm": 1.9236454963684082, "learning_rate": 9.617562265920422e-06, "loss": 0.849, "step": 4291 }, { "epoch": 0.15160118151433402, "grad_norm": 1.852759599685669, "learning_rate": 9.617342830766838e-06, "loss": 0.8969, "step": 4292 }, { "epoch": 0.15163650331804193, "grad_norm": 2.001011848449707, "learning_rate": 9.617123335182341e-06, "loss": 0.8862, "step": 4293 }, { "epoch": 0.15167182512174984, "grad_norm": 1.9282381534576416, "learning_rate": 9.6169037791698e-06, "loss": 0.8673, "step": 4294 }, { "epoch": 0.15170714692545775, "grad_norm": 1.566396713256836, "learning_rate": 9.616684162732092e-06, "loss": 0.8323, "step": 4295 }, { "epoch": 0.15174246872916566, "grad_norm": 1.8976318836212158, "learning_rate": 9.616464485872088e-06, "loss": 0.8904, "step": 4296 }, { "epoch": 0.15177779053287357, "grad_norm": 2.0927183628082275, "learning_rate": 9.616244748592665e-06, "loss": 0.8383, "step": 4297 }, { "epoch": 0.15181311233658146, "grad_norm": 1.976395845413208, "learning_rate": 9.616024950896698e-06, "loss": 0.8848, "step": 4298 }, { "epoch": 0.15184843414028937, "grad_norm": 1.874821424484253, "learning_rate": 9.615805092787066e-06, "loss": 0.8898, "step": 4299 }, { "epoch": 0.15188375594399728, "grad_norm": 1.6765069961547852, "learning_rate": 9.615585174266643e-06, "loss": 0.8654, "step": 4300 }, { "epoch": 0.1519190777477052, "grad_norm": 1.7762471437454224, "learning_rate": 9.615365195338309e-06, "loss": 0.9, "step": 4301 }, { "epoch": 0.1519543995514131, "grad_norm": 1.915113925933838, "learning_rate": 9.615145156004943e-06, "loss": 0.8292, "step": 4302 }, { "epoch": 0.151989721355121, "grad_norm": 1.6841905117034912, "learning_rate": 9.614925056269427e-06, "loss": 0.8821, "step": 4303 }, { "epoch": 0.1520250431588289, "grad_norm": 1.8835194110870361, "learning_rate": 9.614704896134636e-06, "loss": 0.866, "step": 4304 }, { "epoch": 0.1520603649625368, "grad_norm": 2.2089366912841797, "learning_rate": 9.614484675603456e-06, "loss": 0.8714, "step": 4305 }, { "epoch": 0.15209568676624471, "grad_norm": 1.8721462488174438, "learning_rate": 9.614264394678768e-06, "loss": 0.8439, "step": 4306 }, { "epoch": 0.15213100856995262, "grad_norm": 1.850129246711731, "learning_rate": 9.614044053363455e-06, "loss": 0.8932, "step": 4307 }, { "epoch": 0.15216633037366054, "grad_norm": 2.0037901401519775, "learning_rate": 9.6138236516604e-06, "loss": 0.8709, "step": 4308 }, { "epoch": 0.15220165217736845, "grad_norm": 2.1222732067108154, "learning_rate": 9.61360318957249e-06, "loss": 0.8711, "step": 4309 }, { "epoch": 0.15223697398107636, "grad_norm": 1.830776572227478, "learning_rate": 9.613382667102608e-06, "loss": 0.8224, "step": 4310 }, { "epoch": 0.15227229578478424, "grad_norm": 1.983751654624939, "learning_rate": 9.613162084253641e-06, "loss": 0.8679, "step": 4311 }, { "epoch": 0.15230761758849215, "grad_norm": 1.8666260242462158, "learning_rate": 9.612941441028476e-06, "loss": 0.854, "step": 4312 }, { "epoch": 0.15234293939220006, "grad_norm": 2.115008592605591, "learning_rate": 9.612720737430001e-06, "loss": 0.8545, "step": 4313 }, { "epoch": 0.15237826119590797, "grad_norm": 2.0523743629455566, "learning_rate": 9.612499973461102e-06, "loss": 0.8798, "step": 4314 }, { "epoch": 0.15241358299961588, "grad_norm": 2.005399465560913, "learning_rate": 9.612279149124672e-06, "loss": 0.8593, "step": 4315 }, { "epoch": 0.1524489048033238, "grad_norm": 2.1633012294769287, "learning_rate": 9.612058264423599e-06, "loss": 0.9008, "step": 4316 }, { "epoch": 0.15248422660703168, "grad_norm": 1.8414483070373535, "learning_rate": 9.611837319360775e-06, "loss": 0.8736, "step": 4317 }, { "epoch": 0.15251954841073959, "grad_norm": 1.9346798658370972, "learning_rate": 9.61161631393909e-06, "loss": 0.8605, "step": 4318 }, { "epoch": 0.1525548702144475, "grad_norm": 2.6442480087280273, "learning_rate": 9.61139524816144e-06, "loss": 0.8734, "step": 4319 }, { "epoch": 0.1525901920181554, "grad_norm": 2.2213549613952637, "learning_rate": 9.611174122030713e-06, "loss": 0.9045, "step": 4320 }, { "epoch": 0.15262551382186332, "grad_norm": 1.8959709405899048, "learning_rate": 9.610952935549807e-06, "loss": 0.8577, "step": 4321 }, { "epoch": 0.15266083562557123, "grad_norm": 1.735571265220642, "learning_rate": 9.610731688721614e-06, "loss": 0.8705, "step": 4322 }, { "epoch": 0.15269615742927914, "grad_norm": 1.8065282106399536, "learning_rate": 9.610510381549035e-06, "loss": 0.8316, "step": 4323 }, { "epoch": 0.15273147923298702, "grad_norm": 1.7968443632125854, "learning_rate": 9.610289014034962e-06, "loss": 0.8759, "step": 4324 }, { "epoch": 0.15276680103669493, "grad_norm": 1.8908246755599976, "learning_rate": 9.61006758618229e-06, "loss": 0.8581, "step": 4325 }, { "epoch": 0.15280212284040284, "grad_norm": 2.1930713653564453, "learning_rate": 9.609846097993923e-06, "loss": 0.8912, "step": 4326 }, { "epoch": 0.15283744464411075, "grad_norm": 2.3572351932525635, "learning_rate": 9.609624549472756e-06, "loss": 0.8628, "step": 4327 }, { "epoch": 0.15287276644781866, "grad_norm": 1.7894325256347656, "learning_rate": 9.609402940621687e-06, "loss": 0.8552, "step": 4328 }, { "epoch": 0.15290808825152657, "grad_norm": 1.7668401002883911, "learning_rate": 9.609181271443623e-06, "loss": 0.8585, "step": 4329 }, { "epoch": 0.15294341005523446, "grad_norm": 1.9221385717391968, "learning_rate": 9.608959541941458e-06, "loss": 0.8729, "step": 4330 }, { "epoch": 0.15297873185894237, "grad_norm": 1.9438406229019165, "learning_rate": 9.608737752118097e-06, "loss": 0.8898, "step": 4331 }, { "epoch": 0.15301405366265028, "grad_norm": 1.8950620889663696, "learning_rate": 9.608515901976445e-06, "loss": 0.8926, "step": 4332 }, { "epoch": 0.1530493754663582, "grad_norm": 2.0145885944366455, "learning_rate": 9.608293991519402e-06, "loss": 0.8677, "step": 4333 }, { "epoch": 0.1530846972700661, "grad_norm": 1.7973233461380005, "learning_rate": 9.608072020749873e-06, "loss": 0.9071, "step": 4334 }, { "epoch": 0.153120019073774, "grad_norm": 1.8420689105987549, "learning_rate": 9.607849989670763e-06, "loss": 0.8627, "step": 4335 }, { "epoch": 0.15315534087748192, "grad_norm": 1.7703673839569092, "learning_rate": 9.60762789828498e-06, "loss": 0.8507, "step": 4336 }, { "epoch": 0.1531906626811898, "grad_norm": 1.7325351238250732, "learning_rate": 9.607405746595429e-06, "loss": 0.8372, "step": 4337 }, { "epoch": 0.15322598448489771, "grad_norm": 2.660083770751953, "learning_rate": 9.607183534605016e-06, "loss": 0.85, "step": 4338 }, { "epoch": 0.15326130628860563, "grad_norm": 1.8747621774673462, "learning_rate": 9.606961262316654e-06, "loss": 0.8865, "step": 4339 }, { "epoch": 0.15329662809231354, "grad_norm": 2.061074733734131, "learning_rate": 9.606738929733246e-06, "loss": 0.8977, "step": 4340 }, { "epoch": 0.15333194989602145, "grad_norm": 2.2830312252044678, "learning_rate": 9.606516536857706e-06, "loss": 0.8685, "step": 4341 }, { "epoch": 0.15336727169972936, "grad_norm": 2.018030881881714, "learning_rate": 9.606294083692944e-06, "loss": 0.8564, "step": 4342 }, { "epoch": 0.15340259350343724, "grad_norm": 1.6740177869796753, "learning_rate": 9.60607157024187e-06, "loss": 0.834, "step": 4343 }, { "epoch": 0.15343791530714515, "grad_norm": 1.7213717699050903, "learning_rate": 9.605848996507398e-06, "loss": 0.8629, "step": 4344 }, { "epoch": 0.15347323711085306, "grad_norm": 1.8595291376113892, "learning_rate": 9.60562636249244e-06, "loss": 0.8714, "step": 4345 }, { "epoch": 0.15350855891456097, "grad_norm": 1.6998399496078491, "learning_rate": 9.605403668199909e-06, "loss": 0.8293, "step": 4346 }, { "epoch": 0.15354388071826888, "grad_norm": 1.7241970300674438, "learning_rate": 9.605180913632721e-06, "loss": 0.9038, "step": 4347 }, { "epoch": 0.1535792025219768, "grad_norm": 1.9139065742492676, "learning_rate": 9.604958098793792e-06, "loss": 0.8724, "step": 4348 }, { "epoch": 0.1536145243256847, "grad_norm": 1.7518357038497925, "learning_rate": 9.604735223686037e-06, "loss": 0.8861, "step": 4349 }, { "epoch": 0.1536498461293926, "grad_norm": 1.7899346351623535, "learning_rate": 9.60451228831237e-06, "loss": 0.8925, "step": 4350 }, { "epoch": 0.1536851679331005, "grad_norm": 1.7538809776306152, "learning_rate": 9.604289292675715e-06, "loss": 0.8645, "step": 4351 }, { "epoch": 0.1537204897368084, "grad_norm": 1.7200145721435547, "learning_rate": 9.604066236778986e-06, "loss": 0.8622, "step": 4352 }, { "epoch": 0.15375581154051632, "grad_norm": 1.5662777423858643, "learning_rate": 9.603843120625104e-06, "loss": 0.8597, "step": 4353 }, { "epoch": 0.15379113334422423, "grad_norm": 1.6736969947814941, "learning_rate": 9.603619944216989e-06, "loss": 0.877, "step": 4354 }, { "epoch": 0.15382645514793214, "grad_norm": 1.5963395833969116, "learning_rate": 9.603396707557559e-06, "loss": 0.8456, "step": 4355 }, { "epoch": 0.15386177695164002, "grad_norm": 1.7197498083114624, "learning_rate": 9.603173410649741e-06, "loss": 0.8494, "step": 4356 }, { "epoch": 0.15389709875534793, "grad_norm": 1.7767621278762817, "learning_rate": 9.602950053496454e-06, "loss": 0.8613, "step": 4357 }, { "epoch": 0.15393242055905584, "grad_norm": 1.7506028413772583, "learning_rate": 9.602726636100622e-06, "loss": 0.8263, "step": 4358 }, { "epoch": 0.15396774236276375, "grad_norm": 1.870463252067566, "learning_rate": 9.602503158465168e-06, "loss": 0.8652, "step": 4359 }, { "epoch": 0.15400306416647166, "grad_norm": 2.006293535232544, "learning_rate": 9.60227962059302e-06, "loss": 0.853, "step": 4360 }, { "epoch": 0.15403838597017958, "grad_norm": 1.6053069829940796, "learning_rate": 9.602056022487099e-06, "loss": 0.8631, "step": 4361 }, { "epoch": 0.15407370777388749, "grad_norm": 1.7321728467941284, "learning_rate": 9.601832364150334e-06, "loss": 0.8841, "step": 4362 }, { "epoch": 0.15410902957759537, "grad_norm": 1.7039613723754883, "learning_rate": 9.601608645585654e-06, "loss": 0.8804, "step": 4363 }, { "epoch": 0.15414435138130328, "grad_norm": 1.6890380382537842, "learning_rate": 9.601384866795985e-06, "loss": 0.8512, "step": 4364 }, { "epoch": 0.1541796731850112, "grad_norm": 1.960037350654602, "learning_rate": 9.601161027784253e-06, "loss": 0.8734, "step": 4365 }, { "epoch": 0.1542149949887191, "grad_norm": 1.9207834005355835, "learning_rate": 9.600937128553394e-06, "loss": 0.8811, "step": 4366 }, { "epoch": 0.154250316792427, "grad_norm": 1.9747179746627808, "learning_rate": 9.600713169106333e-06, "loss": 0.857, "step": 4367 }, { "epoch": 0.15428563859613492, "grad_norm": 1.8665841817855835, "learning_rate": 9.600489149446001e-06, "loss": 0.856, "step": 4368 }, { "epoch": 0.1543209603998428, "grad_norm": 1.8730450868606567, "learning_rate": 9.600265069575335e-06, "loss": 0.8857, "step": 4369 }, { "epoch": 0.15435628220355072, "grad_norm": 1.8793412446975708, "learning_rate": 9.600040929497264e-06, "loss": 0.9035, "step": 4370 }, { "epoch": 0.15439160400725863, "grad_norm": 1.6587965488433838, "learning_rate": 9.599816729214721e-06, "loss": 0.8461, "step": 4371 }, { "epoch": 0.15442692581096654, "grad_norm": 1.641273856163025, "learning_rate": 9.59959246873064e-06, "loss": 0.8822, "step": 4372 }, { "epoch": 0.15446224761467445, "grad_norm": 1.7643626928329468, "learning_rate": 9.59936814804796e-06, "loss": 0.8551, "step": 4373 }, { "epoch": 0.15449756941838236, "grad_norm": 1.9169293642044067, "learning_rate": 9.599143767169613e-06, "loss": 0.8767, "step": 4374 }, { "epoch": 0.15453289122209027, "grad_norm": 1.7369848489761353, "learning_rate": 9.598919326098538e-06, "loss": 0.8636, "step": 4375 }, { "epoch": 0.15456821302579815, "grad_norm": 1.700812816619873, "learning_rate": 9.59869482483767e-06, "loss": 0.8299, "step": 4376 }, { "epoch": 0.15460353482950606, "grad_norm": 1.7048475742340088, "learning_rate": 9.59847026338995e-06, "loss": 0.8532, "step": 4377 }, { "epoch": 0.15463885663321397, "grad_norm": 1.918250560760498, "learning_rate": 9.598245641758314e-06, "loss": 0.9258, "step": 4378 }, { "epoch": 0.15467417843692188, "grad_norm": 1.723541498184204, "learning_rate": 9.598020959945704e-06, "loss": 0.86, "step": 4379 }, { "epoch": 0.1547095002406298, "grad_norm": 2.4545204639434814, "learning_rate": 9.59779621795506e-06, "loss": 0.8475, "step": 4380 }, { "epoch": 0.1547448220443377, "grad_norm": 1.709202766418457, "learning_rate": 9.597571415789324e-06, "loss": 0.8706, "step": 4381 }, { "epoch": 0.1547801438480456, "grad_norm": 1.8406013250350952, "learning_rate": 9.597346553451436e-06, "loss": 0.8748, "step": 4382 }, { "epoch": 0.1548154656517535, "grad_norm": 1.8064457178115845, "learning_rate": 9.597121630944342e-06, "loss": 0.879, "step": 4383 }, { "epoch": 0.1548507874554614, "grad_norm": 1.7015917301177979, "learning_rate": 9.596896648270983e-06, "loss": 0.8712, "step": 4384 }, { "epoch": 0.15488610925916932, "grad_norm": 1.7132011651992798, "learning_rate": 9.596671605434307e-06, "loss": 0.888, "step": 4385 }, { "epoch": 0.15492143106287723, "grad_norm": 1.681606650352478, "learning_rate": 9.596446502437255e-06, "loss": 0.9082, "step": 4386 }, { "epoch": 0.15495675286658514, "grad_norm": 1.710321068763733, "learning_rate": 9.596221339282776e-06, "loss": 0.8586, "step": 4387 }, { "epoch": 0.15499207467029305, "grad_norm": 1.795348048210144, "learning_rate": 9.595996115973817e-06, "loss": 0.8321, "step": 4388 }, { "epoch": 0.15502739647400093, "grad_norm": 1.8905081748962402, "learning_rate": 9.595770832513324e-06, "loss": 0.87, "step": 4389 }, { "epoch": 0.15506271827770884, "grad_norm": 1.7136895656585693, "learning_rate": 9.595545488904245e-06, "loss": 0.8755, "step": 4390 }, { "epoch": 0.15509804008141675, "grad_norm": 1.6764940023422241, "learning_rate": 9.595320085149533e-06, "loss": 0.8862, "step": 4391 }, { "epoch": 0.15513336188512467, "grad_norm": 1.6124411821365356, "learning_rate": 9.595094621252133e-06, "loss": 0.8575, "step": 4392 }, { "epoch": 0.15516868368883258, "grad_norm": 1.2817294597625732, "learning_rate": 9.594869097215e-06, "loss": 0.6152, "step": 4393 }, { "epoch": 0.1552040054925405, "grad_norm": 1.7064894437789917, "learning_rate": 9.594643513041084e-06, "loss": 0.86, "step": 4394 }, { "epoch": 0.15523932729624837, "grad_norm": 1.6624904870986938, "learning_rate": 9.594417868733337e-06, "loss": 0.8463, "step": 4395 }, { "epoch": 0.15527464909995628, "grad_norm": 1.8824321031570435, "learning_rate": 9.594192164294712e-06, "loss": 0.8726, "step": 4396 }, { "epoch": 0.1553099709036642, "grad_norm": 2.1195003986358643, "learning_rate": 9.593966399728163e-06, "loss": 0.8737, "step": 4397 }, { "epoch": 0.1553452927073721, "grad_norm": 1.8235958814620972, "learning_rate": 9.593740575036648e-06, "loss": 0.8907, "step": 4398 }, { "epoch": 0.15538061451108, "grad_norm": 0.9951743483543396, "learning_rate": 9.593514690223117e-06, "loss": 0.6106, "step": 4399 }, { "epoch": 0.15541593631478792, "grad_norm": 1.8669321537017822, "learning_rate": 9.593288745290532e-06, "loss": 0.8569, "step": 4400 }, { "epoch": 0.15545125811849583, "grad_norm": 1.8906532526016235, "learning_rate": 9.593062740241844e-06, "loss": 0.8863, "step": 4401 }, { "epoch": 0.15548657992220372, "grad_norm": 1.919505000114441, "learning_rate": 9.592836675080017e-06, "loss": 0.8351, "step": 4402 }, { "epoch": 0.15552190172591163, "grad_norm": 2.249021530151367, "learning_rate": 9.592610549808006e-06, "loss": 0.896, "step": 4403 }, { "epoch": 0.15555722352961954, "grad_norm": 1.8819218873977661, "learning_rate": 9.592384364428773e-06, "loss": 0.8549, "step": 4404 }, { "epoch": 0.15559254533332745, "grad_norm": 1.947737693786621, "learning_rate": 9.592158118945274e-06, "loss": 0.8911, "step": 4405 }, { "epoch": 0.15562786713703536, "grad_norm": 1.691322684288025, "learning_rate": 9.591931813360474e-06, "loss": 0.897, "step": 4406 }, { "epoch": 0.15566318894074327, "grad_norm": 1.7615035772323608, "learning_rate": 9.591705447677335e-06, "loss": 0.8723, "step": 4407 }, { "epoch": 0.15569851074445115, "grad_norm": 1.658043384552002, "learning_rate": 9.591479021898816e-06, "loss": 0.8531, "step": 4408 }, { "epoch": 0.15573383254815906, "grad_norm": 1.7785112857818604, "learning_rate": 9.591252536027885e-06, "loss": 0.8402, "step": 4409 }, { "epoch": 0.15576915435186697, "grad_norm": 1.8212082386016846, "learning_rate": 9.591025990067502e-06, "loss": 0.8292, "step": 4410 }, { "epoch": 0.15580447615557488, "grad_norm": 1.7409096956253052, "learning_rate": 9.590799384020634e-06, "loss": 0.8746, "step": 4411 }, { "epoch": 0.1558397979592828, "grad_norm": 1.7812458276748657, "learning_rate": 9.590572717890246e-06, "loss": 0.8745, "step": 4412 }, { "epoch": 0.1558751197629907, "grad_norm": 1.883881688117981, "learning_rate": 9.590345991679307e-06, "loss": 0.9176, "step": 4413 }, { "epoch": 0.15591044156669862, "grad_norm": 1.8798620700836182, "learning_rate": 9.590119205390782e-06, "loss": 0.8396, "step": 4414 }, { "epoch": 0.1559457633704065, "grad_norm": 1.7137930393218994, "learning_rate": 9.58989235902764e-06, "loss": 0.8615, "step": 4415 }, { "epoch": 0.1559810851741144, "grad_norm": 1.723228096961975, "learning_rate": 9.589665452592848e-06, "loss": 0.9185, "step": 4416 }, { "epoch": 0.15601640697782232, "grad_norm": 1.7279564142227173, "learning_rate": 9.589438486089378e-06, "loss": 0.8626, "step": 4417 }, { "epoch": 0.15605172878153023, "grad_norm": 1.8085156679153442, "learning_rate": 9.5892114595202e-06, "loss": 0.9012, "step": 4418 }, { "epoch": 0.15608705058523814, "grad_norm": 1.6712431907653809, "learning_rate": 9.588984372888286e-06, "loss": 0.9074, "step": 4419 }, { "epoch": 0.15612237238894605, "grad_norm": 2.4986956119537354, "learning_rate": 9.588757226196606e-06, "loss": 0.8471, "step": 4420 }, { "epoch": 0.15615769419265393, "grad_norm": 1.7831740379333496, "learning_rate": 9.588530019448134e-06, "loss": 0.8791, "step": 4421 }, { "epoch": 0.15619301599636184, "grad_norm": 1.960203766822815, "learning_rate": 9.588302752645843e-06, "loss": 0.914, "step": 4422 }, { "epoch": 0.15622833780006976, "grad_norm": 1.8358960151672363, "learning_rate": 9.588075425792711e-06, "loss": 0.8403, "step": 4423 }, { "epoch": 0.15626365960377767, "grad_norm": 1.845170497894287, "learning_rate": 9.587848038891707e-06, "loss": 0.8455, "step": 4424 }, { "epoch": 0.15629898140748558, "grad_norm": 1.9860097169876099, "learning_rate": 9.587620591945812e-06, "loss": 0.9068, "step": 4425 }, { "epoch": 0.1563343032111935, "grad_norm": 1.7774510383605957, "learning_rate": 9.587393084958e-06, "loss": 0.8819, "step": 4426 }, { "epoch": 0.1563696250149014, "grad_norm": 1.7397263050079346, "learning_rate": 9.587165517931251e-06, "loss": 0.8522, "step": 4427 }, { "epoch": 0.15640494681860928, "grad_norm": 1.8054887056350708, "learning_rate": 9.586937890868541e-06, "loss": 0.8847, "step": 4428 }, { "epoch": 0.1564402686223172, "grad_norm": 1.9351413249969482, "learning_rate": 9.58671020377285e-06, "loss": 0.8354, "step": 4429 }, { "epoch": 0.1564755904260251, "grad_norm": 2.0944764614105225, "learning_rate": 9.58648245664716e-06, "loss": 0.9043, "step": 4430 }, { "epoch": 0.156510912229733, "grad_norm": 1.689970850944519, "learning_rate": 9.586254649494448e-06, "loss": 0.8622, "step": 4431 }, { "epoch": 0.15654623403344092, "grad_norm": 1.6894078254699707, "learning_rate": 9.5860267823177e-06, "loss": 0.86, "step": 4432 }, { "epoch": 0.15658155583714883, "grad_norm": 1.8853543996810913, "learning_rate": 9.585798855119893e-06, "loss": 0.8382, "step": 4433 }, { "epoch": 0.15661687764085672, "grad_norm": 1.7890903949737549, "learning_rate": 9.585570867904012e-06, "loss": 0.8221, "step": 4434 }, { "epoch": 0.15665219944456463, "grad_norm": 1.7906872034072876, "learning_rate": 9.585342820673042e-06, "loss": 0.8667, "step": 4435 }, { "epoch": 0.15668752124827254, "grad_norm": 1.8984020948410034, "learning_rate": 9.585114713429967e-06, "loss": 0.873, "step": 4436 }, { "epoch": 0.15672284305198045, "grad_norm": 2.1266865730285645, "learning_rate": 9.584886546177775e-06, "loss": 0.8566, "step": 4437 }, { "epoch": 0.15675816485568836, "grad_norm": 1.7521450519561768, "learning_rate": 9.584658318919446e-06, "loss": 0.8907, "step": 4438 }, { "epoch": 0.15679348665939627, "grad_norm": 1.6898155212402344, "learning_rate": 9.584430031657971e-06, "loss": 0.87, "step": 4439 }, { "epoch": 0.15682880846310418, "grad_norm": 1.8161892890930176, "learning_rate": 9.58420168439634e-06, "loss": 0.8878, "step": 4440 }, { "epoch": 0.15686413026681206, "grad_norm": 1.8108662366867065, "learning_rate": 9.583973277137538e-06, "loss": 0.882, "step": 4441 }, { "epoch": 0.15689945207051997, "grad_norm": 1.7723618745803833, "learning_rate": 9.583744809884557e-06, "loss": 0.8574, "step": 4442 }, { "epoch": 0.15693477387422788, "grad_norm": 1.8718547821044922, "learning_rate": 9.583516282640383e-06, "loss": 0.8548, "step": 4443 }, { "epoch": 0.1569700956779358, "grad_norm": 2.0727956295013428, "learning_rate": 9.58328769540801e-06, "loss": 0.8435, "step": 4444 }, { "epoch": 0.1570054174816437, "grad_norm": 1.6261919736862183, "learning_rate": 9.583059048190431e-06, "loss": 0.8264, "step": 4445 }, { "epoch": 0.15704073928535162, "grad_norm": 1.8055901527404785, "learning_rate": 9.582830340990634e-06, "loss": 0.8835, "step": 4446 }, { "epoch": 0.15707606108905953, "grad_norm": 3.090034008026123, "learning_rate": 9.582601573811618e-06, "loss": 0.8845, "step": 4447 }, { "epoch": 0.1571113828927674, "grad_norm": 1.7126381397247314, "learning_rate": 9.582372746656372e-06, "loss": 0.8843, "step": 4448 }, { "epoch": 0.15714670469647532, "grad_norm": 1.781502366065979, "learning_rate": 9.582143859527893e-06, "loss": 0.8479, "step": 4449 }, { "epoch": 0.15718202650018323, "grad_norm": 1.90943443775177, "learning_rate": 9.581914912429178e-06, "loss": 0.8651, "step": 4450 }, { "epoch": 0.15721734830389114, "grad_norm": 1.8078256845474243, "learning_rate": 9.58168590536322e-06, "loss": 0.8694, "step": 4451 }, { "epoch": 0.15725267010759905, "grad_norm": 1.7512556314468384, "learning_rate": 9.581456838333019e-06, "loss": 0.887, "step": 4452 }, { "epoch": 0.15728799191130696, "grad_norm": 1.9892905950546265, "learning_rate": 9.581227711341572e-06, "loss": 0.8678, "step": 4453 }, { "epoch": 0.15732331371501485, "grad_norm": 1.70060396194458, "learning_rate": 9.580998524391879e-06, "loss": 0.9122, "step": 4454 }, { "epoch": 0.15735863551872276, "grad_norm": 1.7228108644485474, "learning_rate": 9.580769277486936e-06, "loss": 0.87, "step": 4455 }, { "epoch": 0.15739395732243067, "grad_norm": 1.1233062744140625, "learning_rate": 9.580539970629748e-06, "loss": 0.5753, "step": 4456 }, { "epoch": 0.15742927912613858, "grad_norm": 2.1606123447418213, "learning_rate": 9.580310603823313e-06, "loss": 0.85, "step": 4457 }, { "epoch": 0.1574646009298465, "grad_norm": 1.8514416217803955, "learning_rate": 9.580081177070633e-06, "loss": 0.8491, "step": 4458 }, { "epoch": 0.1574999227335544, "grad_norm": 2.0261874198913574, "learning_rate": 9.579851690374712e-06, "loss": 0.8611, "step": 4459 }, { "epoch": 0.1575352445372623, "grad_norm": 1.6449732780456543, "learning_rate": 9.579622143738554e-06, "loss": 0.8525, "step": 4460 }, { "epoch": 0.1575705663409702, "grad_norm": 1.7725682258605957, "learning_rate": 9.579392537165163e-06, "loss": 0.8589, "step": 4461 }, { "epoch": 0.1576058881446781, "grad_norm": 1.7993009090423584, "learning_rate": 9.57916287065754e-06, "loss": 0.8755, "step": 4462 }, { "epoch": 0.157641209948386, "grad_norm": 1.7474640607833862, "learning_rate": 9.578933144218697e-06, "loss": 0.8731, "step": 4463 }, { "epoch": 0.15767653175209392, "grad_norm": 2.0632965564727783, "learning_rate": 9.578703357851637e-06, "loss": 0.8532, "step": 4464 }, { "epoch": 0.15771185355580183, "grad_norm": 1.7503987550735474, "learning_rate": 9.578473511559368e-06, "loss": 0.8681, "step": 4465 }, { "epoch": 0.15774717535950974, "grad_norm": 1.9289714097976685, "learning_rate": 9.5782436053449e-06, "loss": 0.8821, "step": 4466 }, { "epoch": 0.15778249716321763, "grad_norm": 1.935007929801941, "learning_rate": 9.578013639211236e-06, "loss": 0.8754, "step": 4467 }, { "epoch": 0.15781781896692554, "grad_norm": 1.7596385478973389, "learning_rate": 9.577783613161393e-06, "loss": 0.8761, "step": 4468 }, { "epoch": 0.15785314077063345, "grad_norm": 1.698028326034546, "learning_rate": 9.57755352719838e-06, "loss": 0.8535, "step": 4469 }, { "epoch": 0.15788846257434136, "grad_norm": 1.9278217554092407, "learning_rate": 9.577323381325205e-06, "loss": 0.8689, "step": 4470 }, { "epoch": 0.15792378437804927, "grad_norm": 1.6470224857330322, "learning_rate": 9.577093175544882e-06, "loss": 0.843, "step": 4471 }, { "epoch": 0.15795910618175718, "grad_norm": 1.0427697896957397, "learning_rate": 9.576862909860424e-06, "loss": 0.593, "step": 4472 }, { "epoch": 0.1579944279854651, "grad_norm": 1.7853769063949585, "learning_rate": 9.576632584274845e-06, "loss": 0.847, "step": 4473 }, { "epoch": 0.15802974978917297, "grad_norm": 1.6653531789779663, "learning_rate": 9.57640219879116e-06, "loss": 0.8331, "step": 4474 }, { "epoch": 0.15806507159288088, "grad_norm": 1.5744684934616089, "learning_rate": 9.576171753412382e-06, "loss": 0.841, "step": 4475 }, { "epoch": 0.1581003933965888, "grad_norm": 1.86892831325531, "learning_rate": 9.575941248141527e-06, "loss": 0.8657, "step": 4476 }, { "epoch": 0.1581357152002967, "grad_norm": 1.703787922859192, "learning_rate": 9.575710682981616e-06, "loss": 0.8683, "step": 4477 }, { "epoch": 0.15817103700400462, "grad_norm": 1.7173309326171875, "learning_rate": 9.575480057935661e-06, "loss": 0.8597, "step": 4478 }, { "epoch": 0.15820635880771253, "grad_norm": 1.642771601676941, "learning_rate": 9.575249373006686e-06, "loss": 0.8612, "step": 4479 }, { "epoch": 0.1582416806114204, "grad_norm": 1.7401450872421265, "learning_rate": 9.575018628197706e-06, "loss": 0.8646, "step": 4480 }, { "epoch": 0.15827700241512832, "grad_norm": 2.2552502155303955, "learning_rate": 9.57478782351174e-06, "loss": 0.8591, "step": 4481 }, { "epoch": 0.15831232421883623, "grad_norm": 1.8336907625198364, "learning_rate": 9.574556958951813e-06, "loss": 0.8721, "step": 4482 }, { "epoch": 0.15834764602254414, "grad_norm": 1.5907870531082153, "learning_rate": 9.574326034520945e-06, "loss": 0.8866, "step": 4483 }, { "epoch": 0.15838296782625205, "grad_norm": 1.857277750968933, "learning_rate": 9.574095050222156e-06, "loss": 0.8703, "step": 4484 }, { "epoch": 0.15841828962995996, "grad_norm": 1.6237459182739258, "learning_rate": 9.573864006058472e-06, "loss": 0.8656, "step": 4485 }, { "epoch": 0.15845361143366787, "grad_norm": 9.603873252868652, "learning_rate": 9.573632902032916e-06, "loss": 0.8809, "step": 4486 }, { "epoch": 0.15848893323737576, "grad_norm": 1.925521731376648, "learning_rate": 9.573401738148512e-06, "loss": 0.8766, "step": 4487 }, { "epoch": 0.15852425504108367, "grad_norm": 2.9832983016967773, "learning_rate": 9.573170514408286e-06, "loss": 0.8832, "step": 4488 }, { "epoch": 0.15855957684479158, "grad_norm": 1.8402671813964844, "learning_rate": 9.572939230815263e-06, "loss": 0.9004, "step": 4489 }, { "epoch": 0.1585948986484995, "grad_norm": 1.7341591119766235, "learning_rate": 9.572707887372473e-06, "loss": 0.8629, "step": 4490 }, { "epoch": 0.1586302204522074, "grad_norm": 1.8944989442825317, "learning_rate": 9.572476484082939e-06, "loss": 0.9096, "step": 4491 }, { "epoch": 0.1586655422559153, "grad_norm": 1.8780531883239746, "learning_rate": 9.572245020949692e-06, "loss": 0.8923, "step": 4492 }, { "epoch": 0.1587008640596232, "grad_norm": 1.794226884841919, "learning_rate": 9.572013497975765e-06, "loss": 0.8839, "step": 4493 }, { "epoch": 0.1587361858633311, "grad_norm": 1.6030054092407227, "learning_rate": 9.571781915164181e-06, "loss": 0.8809, "step": 4494 }, { "epoch": 0.158771507667039, "grad_norm": 1.9235244989395142, "learning_rate": 9.571550272517978e-06, "loss": 0.8977, "step": 4495 }, { "epoch": 0.15880682947074692, "grad_norm": 1.823507308959961, "learning_rate": 9.571318570040184e-06, "loss": 0.8864, "step": 4496 }, { "epoch": 0.15884215127445483, "grad_norm": 1.6630219221115112, "learning_rate": 9.571086807733831e-06, "loss": 0.8778, "step": 4497 }, { "epoch": 0.15887747307816275, "grad_norm": 1.7601138353347778, "learning_rate": 9.570854985601951e-06, "loss": 0.8615, "step": 4498 }, { "epoch": 0.15891279488187066, "grad_norm": 1.9041982889175415, "learning_rate": 9.570623103647584e-06, "loss": 0.8832, "step": 4499 }, { "epoch": 0.15894811668557854, "grad_norm": 1.9966768026351929, "learning_rate": 9.57039116187376e-06, "loss": 0.8726, "step": 4500 }, { "epoch": 0.15898343848928645, "grad_norm": 1.7773845195770264, "learning_rate": 9.570159160283514e-06, "loss": 0.8523, "step": 4501 }, { "epoch": 0.15901876029299436, "grad_norm": 1.6587415933609009, "learning_rate": 9.569927098879885e-06, "loss": 0.8574, "step": 4502 }, { "epoch": 0.15905408209670227, "grad_norm": 1.6499435901641846, "learning_rate": 9.569694977665908e-06, "loss": 0.8641, "step": 4503 }, { "epoch": 0.15908940390041018, "grad_norm": 1.6502742767333984, "learning_rate": 9.569462796644624e-06, "loss": 0.8557, "step": 4504 }, { "epoch": 0.1591247257041181, "grad_norm": 1.757794737815857, "learning_rate": 9.56923055581907e-06, "loss": 0.8476, "step": 4505 }, { "epoch": 0.15916004750782597, "grad_norm": 1.844215750694275, "learning_rate": 9.568998255192283e-06, "loss": 0.8471, "step": 4506 }, { "epoch": 0.15919536931153389, "grad_norm": 1.6849788427352905, "learning_rate": 9.568765894767308e-06, "loss": 0.8564, "step": 4507 }, { "epoch": 0.1592306911152418, "grad_norm": 1.6865044832229614, "learning_rate": 9.568533474547183e-06, "loss": 0.8638, "step": 4508 }, { "epoch": 0.1592660129189497, "grad_norm": 1.625893235206604, "learning_rate": 9.56830099453495e-06, "loss": 0.8367, "step": 4509 }, { "epoch": 0.15930133472265762, "grad_norm": 1.7076669931411743, "learning_rate": 9.568068454733654e-06, "loss": 0.8665, "step": 4510 }, { "epoch": 0.15933665652636553, "grad_norm": 1.5384149551391602, "learning_rate": 9.567835855146336e-06, "loss": 0.8159, "step": 4511 }, { "epoch": 0.15937197833007344, "grad_norm": 1.7454938888549805, "learning_rate": 9.567603195776041e-06, "loss": 0.9004, "step": 4512 }, { "epoch": 0.15940730013378132, "grad_norm": 1.6462771892547607, "learning_rate": 9.567370476625813e-06, "loss": 0.8566, "step": 4513 }, { "epoch": 0.15944262193748923, "grad_norm": 1.6661007404327393, "learning_rate": 9.567137697698699e-06, "loss": 0.8834, "step": 4514 }, { "epoch": 0.15947794374119714, "grad_norm": 1.8077671527862549, "learning_rate": 9.566904858997746e-06, "loss": 0.8488, "step": 4515 }, { "epoch": 0.15951326554490505, "grad_norm": 1.7636727094650269, "learning_rate": 9.566671960526001e-06, "loss": 0.8929, "step": 4516 }, { "epoch": 0.15954858734861296, "grad_norm": 1.801389455795288, "learning_rate": 9.566439002286512e-06, "loss": 0.8832, "step": 4517 }, { "epoch": 0.15958390915232087, "grad_norm": 1.800388216972351, "learning_rate": 9.566205984282326e-06, "loss": 0.8648, "step": 4518 }, { "epoch": 0.15961923095602876, "grad_norm": 1.1094145774841309, "learning_rate": 9.565972906516496e-06, "loss": 0.6283, "step": 4519 }, { "epoch": 0.15965455275973667, "grad_norm": 1.9697439670562744, "learning_rate": 9.56573976899207e-06, "loss": 0.9062, "step": 4520 }, { "epoch": 0.15968987456344458, "grad_norm": 1.8231598138809204, "learning_rate": 9.565506571712103e-06, "loss": 0.9238, "step": 4521 }, { "epoch": 0.1597251963671525, "grad_norm": 1.8037866353988647, "learning_rate": 9.565273314679639e-06, "loss": 0.8297, "step": 4522 }, { "epoch": 0.1597605181708604, "grad_norm": 1.8720519542694092, "learning_rate": 9.56503999789774e-06, "loss": 0.8513, "step": 4523 }, { "epoch": 0.1597958399745683, "grad_norm": 1.689247965812683, "learning_rate": 9.564806621369455e-06, "loss": 0.8382, "step": 4524 }, { "epoch": 0.15983116177827622, "grad_norm": 1.6921017169952393, "learning_rate": 9.564573185097839e-06, "loss": 0.8628, "step": 4525 }, { "epoch": 0.1598664835819841, "grad_norm": 1.671445608139038, "learning_rate": 9.564339689085947e-06, "loss": 0.8518, "step": 4526 }, { "epoch": 0.15990180538569201, "grad_norm": 1.870784044265747, "learning_rate": 9.564106133336835e-06, "loss": 0.8898, "step": 4527 }, { "epoch": 0.15993712718939992, "grad_norm": 2.0065574645996094, "learning_rate": 9.563872517853559e-06, "loss": 0.8534, "step": 4528 }, { "epoch": 0.15997244899310784, "grad_norm": 1.7558553218841553, "learning_rate": 9.563638842639177e-06, "loss": 0.8682, "step": 4529 }, { "epoch": 0.16000777079681575, "grad_norm": 1.8284600973129272, "learning_rate": 9.56340510769675e-06, "loss": 0.8589, "step": 4530 }, { "epoch": 0.16004309260052366, "grad_norm": 1.7325495481491089, "learning_rate": 9.563171313029334e-06, "loss": 0.8855, "step": 4531 }, { "epoch": 0.16007841440423154, "grad_norm": 1.7652751207351685, "learning_rate": 9.56293745863999e-06, "loss": 0.8594, "step": 4532 }, { "epoch": 0.16011373620793945, "grad_norm": 1.9750179052352905, "learning_rate": 9.562703544531777e-06, "loss": 0.8336, "step": 4533 }, { "epoch": 0.16014905801164736, "grad_norm": 1.9160971641540527, "learning_rate": 9.562469570707758e-06, "loss": 0.9034, "step": 4534 }, { "epoch": 0.16018437981535527, "grad_norm": 1.7924457788467407, "learning_rate": 9.562235537170996e-06, "loss": 0.8836, "step": 4535 }, { "epoch": 0.16021970161906318, "grad_norm": 2.230205535888672, "learning_rate": 9.56200144392455e-06, "loss": 0.8938, "step": 4536 }, { "epoch": 0.1602550234227711, "grad_norm": 1.9920512437820435, "learning_rate": 9.56176729097149e-06, "loss": 0.8726, "step": 4537 }, { "epoch": 0.160290345226479, "grad_norm": 1.8047130107879639, "learning_rate": 9.561533078314876e-06, "loss": 0.8564, "step": 4538 }, { "epoch": 0.16032566703018689, "grad_norm": 1.7897396087646484, "learning_rate": 9.561298805957774e-06, "loss": 0.8566, "step": 4539 }, { "epoch": 0.1603609888338948, "grad_norm": 2.124241352081299, "learning_rate": 9.561064473903252e-06, "loss": 0.8837, "step": 4540 }, { "epoch": 0.1603963106376027, "grad_norm": 1.9531922340393066, "learning_rate": 9.560830082154375e-06, "loss": 0.8701, "step": 4541 }, { "epoch": 0.16043163244131062, "grad_norm": 1.969892144203186, "learning_rate": 9.56059563071421e-06, "loss": 0.8914, "step": 4542 }, { "epoch": 0.16046695424501853, "grad_norm": 1.6985328197479248, "learning_rate": 9.560361119585828e-06, "loss": 0.8766, "step": 4543 }, { "epoch": 0.16050227604872644, "grad_norm": 1.8133196830749512, "learning_rate": 9.560126548772297e-06, "loss": 0.8439, "step": 4544 }, { "epoch": 0.16053759785243432, "grad_norm": 1.5982722043991089, "learning_rate": 9.559891918276685e-06, "loss": 0.8535, "step": 4545 }, { "epoch": 0.16057291965614223, "grad_norm": 1.9188026189804077, "learning_rate": 9.559657228102068e-06, "loss": 0.895, "step": 4546 }, { "epoch": 0.16060824145985014, "grad_norm": 1.6847268342971802, "learning_rate": 9.559422478251513e-06, "loss": 0.8474, "step": 4547 }, { "epoch": 0.16064356326355805, "grad_norm": 1.7039889097213745, "learning_rate": 9.559187668728091e-06, "loss": 0.8738, "step": 4548 }, { "epoch": 0.16067888506726596, "grad_norm": 1.653676152229309, "learning_rate": 9.55895279953488e-06, "loss": 0.9247, "step": 4549 }, { "epoch": 0.16071420687097387, "grad_norm": 2.1172993183135986, "learning_rate": 9.558717870674953e-06, "loss": 0.8933, "step": 4550 }, { "epoch": 0.16074952867468179, "grad_norm": 1.7843716144561768, "learning_rate": 9.558482882151381e-06, "loss": 0.8546, "step": 4551 }, { "epoch": 0.16078485047838967, "grad_norm": 1.7206534147262573, "learning_rate": 9.558247833967245e-06, "loss": 0.8437, "step": 4552 }, { "epoch": 0.16082017228209758, "grad_norm": 1.7280266284942627, "learning_rate": 9.558012726125616e-06, "loss": 0.8478, "step": 4553 }, { "epoch": 0.1608554940858055, "grad_norm": 1.6714067459106445, "learning_rate": 9.557777558629573e-06, "loss": 0.8828, "step": 4554 }, { "epoch": 0.1608908158895134, "grad_norm": 1.85481595993042, "learning_rate": 9.557542331482195e-06, "loss": 0.8636, "step": 4555 }, { "epoch": 0.1609261376932213, "grad_norm": 1.1798051595687866, "learning_rate": 9.55730704468656e-06, "loss": 0.5998, "step": 4556 }, { "epoch": 0.16096145949692922, "grad_norm": 2.310108184814453, "learning_rate": 9.557071698245746e-06, "loss": 0.8814, "step": 4557 }, { "epoch": 0.1609967813006371, "grad_norm": 0.9865424633026123, "learning_rate": 9.556836292162834e-06, "loss": 0.5811, "step": 4558 }, { "epoch": 0.16103210310434501, "grad_norm": 2.1393356323242188, "learning_rate": 9.556600826440906e-06, "loss": 0.8845, "step": 4559 }, { "epoch": 0.16106742490805293, "grad_norm": 2.03715443611145, "learning_rate": 9.556365301083043e-06, "loss": 0.864, "step": 4560 }, { "epoch": 0.16110274671176084, "grad_norm": 1.1125379800796509, "learning_rate": 9.556129716092329e-06, "loss": 0.6005, "step": 4561 }, { "epoch": 0.16113806851546875, "grad_norm": 2.0812430381774902, "learning_rate": 9.555894071471844e-06, "loss": 0.8458, "step": 4562 }, { "epoch": 0.16117339031917666, "grad_norm": 3.2150609493255615, "learning_rate": 9.555658367224672e-06, "loss": 0.8908, "step": 4563 }, { "epoch": 0.16120871212288457, "grad_norm": 1.96006441116333, "learning_rate": 9.555422603353903e-06, "loss": 0.8847, "step": 4564 }, { "epoch": 0.16124403392659245, "grad_norm": 1.9073320627212524, "learning_rate": 9.555186779862618e-06, "loss": 0.9136, "step": 4565 }, { "epoch": 0.16127935573030036, "grad_norm": 1.8725337982177734, "learning_rate": 9.554950896753904e-06, "loss": 0.9083, "step": 4566 }, { "epoch": 0.16131467753400827, "grad_norm": 1.9298559427261353, "learning_rate": 9.554714954030849e-06, "loss": 0.86, "step": 4567 }, { "epoch": 0.16134999933771618, "grad_norm": 1.7614600658416748, "learning_rate": 9.554478951696542e-06, "loss": 0.8993, "step": 4568 }, { "epoch": 0.1613853211414241, "grad_norm": 1.7622590065002441, "learning_rate": 9.554242889754071e-06, "loss": 0.8587, "step": 4569 }, { "epoch": 0.161420642945132, "grad_norm": 2.020524740219116, "learning_rate": 9.554006768206523e-06, "loss": 0.8623, "step": 4570 }, { "epoch": 0.1614559647488399, "grad_norm": 2.0326650142669678, "learning_rate": 9.553770587056992e-06, "loss": 0.8497, "step": 4571 }, { "epoch": 0.1614912865525478, "grad_norm": 1.8329269886016846, "learning_rate": 9.553534346308566e-06, "loss": 0.903, "step": 4572 }, { "epoch": 0.1615266083562557, "grad_norm": 1.8125684261322021, "learning_rate": 9.553298045964341e-06, "loss": 0.8642, "step": 4573 }, { "epoch": 0.16156193015996362, "grad_norm": 1.7996762990951538, "learning_rate": 9.553061686027405e-06, "loss": 0.8746, "step": 4574 }, { "epoch": 0.16159725196367153, "grad_norm": 2.091294527053833, "learning_rate": 9.552825266500854e-06, "loss": 0.8358, "step": 4575 }, { "epoch": 0.16163257376737944, "grad_norm": 1.6661449670791626, "learning_rate": 9.552588787387782e-06, "loss": 0.8054, "step": 4576 }, { "epoch": 0.16166789557108735, "grad_norm": 1.6100058555603027, "learning_rate": 9.552352248691283e-06, "loss": 0.8459, "step": 4577 }, { "epoch": 0.16170321737479523, "grad_norm": 1.6765857934951782, "learning_rate": 9.552115650414456e-06, "loss": 0.8343, "step": 4578 }, { "epoch": 0.16173853917850314, "grad_norm": 1.9517937898635864, "learning_rate": 9.551878992560393e-06, "loss": 0.8681, "step": 4579 }, { "epoch": 0.16177386098221105, "grad_norm": 1.7837753295898438, "learning_rate": 9.551642275132193e-06, "loss": 0.8815, "step": 4580 }, { "epoch": 0.16180918278591896, "grad_norm": 1.8757884502410889, "learning_rate": 9.551405498132955e-06, "loss": 0.8623, "step": 4581 }, { "epoch": 0.16184450458962688, "grad_norm": 1.8249623775482178, "learning_rate": 9.551168661565778e-06, "loss": 0.8669, "step": 4582 }, { "epoch": 0.16187982639333479, "grad_norm": 1.79899263381958, "learning_rate": 9.55093176543376e-06, "loss": 0.8607, "step": 4583 }, { "epoch": 0.16191514819704267, "grad_norm": 1.674225091934204, "learning_rate": 9.550694809740004e-06, "loss": 0.8698, "step": 4584 }, { "epoch": 0.16195047000075058, "grad_norm": 1.9229363203048706, "learning_rate": 9.55045779448761e-06, "loss": 0.8681, "step": 4585 }, { "epoch": 0.1619857918044585, "grad_norm": 1.6407828330993652, "learning_rate": 9.550220719679679e-06, "loss": 0.8839, "step": 4586 }, { "epoch": 0.1620211136081664, "grad_norm": 1.6473538875579834, "learning_rate": 9.549983585319313e-06, "loss": 0.8493, "step": 4587 }, { "epoch": 0.1620564354118743, "grad_norm": 1.6379705667495728, "learning_rate": 9.549746391409619e-06, "loss": 0.8414, "step": 4588 }, { "epoch": 0.16209175721558222, "grad_norm": 1.7644575834274292, "learning_rate": 9.549509137953698e-06, "loss": 0.9179, "step": 4589 }, { "epoch": 0.16212707901929013, "grad_norm": 1.9736213684082031, "learning_rate": 9.549271824954658e-06, "loss": 0.9093, "step": 4590 }, { "epoch": 0.16216240082299802, "grad_norm": 1.8113322257995605, "learning_rate": 9.549034452415604e-06, "loss": 0.8619, "step": 4591 }, { "epoch": 0.16219772262670593, "grad_norm": 1.7272863388061523, "learning_rate": 9.548797020339642e-06, "loss": 0.8564, "step": 4592 }, { "epoch": 0.16223304443041384, "grad_norm": 1.5998895168304443, "learning_rate": 9.548559528729878e-06, "loss": 0.8542, "step": 4593 }, { "epoch": 0.16226836623412175, "grad_norm": 1.8904039859771729, "learning_rate": 9.548321977589422e-06, "loss": 0.8896, "step": 4594 }, { "epoch": 0.16230368803782966, "grad_norm": 1.7248289585113525, "learning_rate": 9.548084366921385e-06, "loss": 0.855, "step": 4595 }, { "epoch": 0.16233900984153757, "grad_norm": 3.633201837539673, "learning_rate": 9.547846696728873e-06, "loss": 0.8739, "step": 4596 }, { "epoch": 0.16237433164524545, "grad_norm": 1.9260865449905396, "learning_rate": 9.547608967015e-06, "loss": 0.8675, "step": 4597 }, { "epoch": 0.16240965344895336, "grad_norm": 1.8245186805725098, "learning_rate": 9.547371177782875e-06, "loss": 0.8735, "step": 4598 }, { "epoch": 0.16244497525266127, "grad_norm": 1.7844393253326416, "learning_rate": 9.547133329035609e-06, "loss": 0.8509, "step": 4599 }, { "epoch": 0.16248029705636918, "grad_norm": 1.6748969554901123, "learning_rate": 9.546895420776318e-06, "loss": 0.8475, "step": 4600 }, { "epoch": 0.1625156188600771, "grad_norm": 1.8694995641708374, "learning_rate": 9.546657453008116e-06, "loss": 0.8735, "step": 4601 }, { "epoch": 0.162550940663785, "grad_norm": 1.8914713859558105, "learning_rate": 9.546419425734115e-06, "loss": 0.8678, "step": 4602 }, { "epoch": 0.16258626246749291, "grad_norm": 1.7216806411743164, "learning_rate": 9.54618133895743e-06, "loss": 0.8793, "step": 4603 }, { "epoch": 0.1626215842712008, "grad_norm": 1.8474552631378174, "learning_rate": 9.54594319268118e-06, "loss": 0.8871, "step": 4604 }, { "epoch": 0.1626569060749087, "grad_norm": 1.8173166513442993, "learning_rate": 9.54570498690848e-06, "loss": 0.8603, "step": 4605 }, { "epoch": 0.16269222787861662, "grad_norm": 1.7051523923873901, "learning_rate": 9.545466721642446e-06, "loss": 0.923, "step": 4606 }, { "epoch": 0.16272754968232453, "grad_norm": 1.7422534227371216, "learning_rate": 9.5452283968862e-06, "loss": 0.8612, "step": 4607 }, { "epoch": 0.16276287148603244, "grad_norm": 1.5336092710494995, "learning_rate": 9.544990012642856e-06, "loss": 0.8647, "step": 4608 }, { "epoch": 0.16279819328974035, "grad_norm": 1.7565793991088867, "learning_rate": 9.54475156891554e-06, "loss": 0.8434, "step": 4609 }, { "epoch": 0.16283351509344823, "grad_norm": 1.7360292673110962, "learning_rate": 9.544513065707368e-06, "loss": 0.884, "step": 4610 }, { "epoch": 0.16286883689715614, "grad_norm": 1.7067102193832397, "learning_rate": 9.544274503021464e-06, "loss": 0.8575, "step": 4611 }, { "epoch": 0.16290415870086405, "grad_norm": 1.8699257373809814, "learning_rate": 9.54403588086095e-06, "loss": 0.8643, "step": 4612 }, { "epoch": 0.16293948050457197, "grad_norm": 1.752168893814087, "learning_rate": 9.543797199228948e-06, "loss": 0.8648, "step": 4613 }, { "epoch": 0.16297480230827988, "grad_norm": 1.7933167219161987, "learning_rate": 9.543558458128584e-06, "loss": 0.8514, "step": 4614 }, { "epoch": 0.1630101241119878, "grad_norm": 1.7026429176330566, "learning_rate": 9.543319657562979e-06, "loss": 0.8558, "step": 4615 }, { "epoch": 0.1630454459156957, "grad_norm": 1.6999987363815308, "learning_rate": 9.54308079753526e-06, "loss": 0.8575, "step": 4616 }, { "epoch": 0.16308076771940358, "grad_norm": 1.905109167098999, "learning_rate": 9.542841878048554e-06, "loss": 0.8465, "step": 4617 }, { "epoch": 0.1631160895231115, "grad_norm": 1.6554551124572754, "learning_rate": 9.542602899105988e-06, "loss": 0.8597, "step": 4618 }, { "epoch": 0.1631514113268194, "grad_norm": 1.7454365491867065, "learning_rate": 9.542363860710689e-06, "loss": 0.9013, "step": 4619 }, { "epoch": 0.1631867331305273, "grad_norm": 1.9803364276885986, "learning_rate": 9.542124762865786e-06, "loss": 0.863, "step": 4620 }, { "epoch": 0.16322205493423522, "grad_norm": 2.140927314758301, "learning_rate": 9.541885605574408e-06, "loss": 0.8827, "step": 4621 }, { "epoch": 0.16325737673794313, "grad_norm": 1.966282606124878, "learning_rate": 9.541646388839684e-06, "loss": 0.8706, "step": 4622 }, { "epoch": 0.16329269854165102, "grad_norm": 1.6857634782791138, "learning_rate": 9.541407112664747e-06, "loss": 0.8818, "step": 4623 }, { "epoch": 0.16332802034535893, "grad_norm": 4.45681619644165, "learning_rate": 9.541167777052728e-06, "loss": 0.8658, "step": 4624 }, { "epoch": 0.16336334214906684, "grad_norm": 1.6503244638442993, "learning_rate": 9.540928382006757e-06, "loss": 0.8556, "step": 4625 }, { "epoch": 0.16339866395277475, "grad_norm": 1.8185936212539673, "learning_rate": 9.540688927529968e-06, "loss": 0.9056, "step": 4626 }, { "epoch": 0.16343398575648266, "grad_norm": 1.6765835285186768, "learning_rate": 9.540449413625499e-06, "loss": 0.8499, "step": 4627 }, { "epoch": 0.16346930756019057, "grad_norm": 1.8170770406723022, "learning_rate": 9.54020984029648e-06, "loss": 0.8713, "step": 4628 }, { "epoch": 0.16350462936389848, "grad_norm": 1.7820658683776855, "learning_rate": 9.539970207546048e-06, "loss": 0.8875, "step": 4629 }, { "epoch": 0.16353995116760636, "grad_norm": 1.8329006433486938, "learning_rate": 9.539730515377339e-06, "loss": 0.8691, "step": 4630 }, { "epoch": 0.16357527297131427, "grad_norm": 3.7660887241363525, "learning_rate": 9.539490763793492e-06, "loss": 0.866, "step": 4631 }, { "epoch": 0.16361059477502218, "grad_norm": 1.7486858367919922, "learning_rate": 9.53925095279764e-06, "loss": 0.8788, "step": 4632 }, { "epoch": 0.1636459165787301, "grad_norm": 1.9219896793365479, "learning_rate": 9.539011082392929e-06, "loss": 0.8624, "step": 4633 }, { "epoch": 0.163681238382438, "grad_norm": 1.6824442148208618, "learning_rate": 9.53877115258249e-06, "loss": 0.835, "step": 4634 }, { "epoch": 0.16371656018614592, "grad_norm": 1.7871854305267334, "learning_rate": 9.53853116336947e-06, "loss": 0.8745, "step": 4635 }, { "epoch": 0.1637518819898538, "grad_norm": 2.3882486820220947, "learning_rate": 9.538291114757008e-06, "loss": 0.8529, "step": 4636 }, { "epoch": 0.1637872037935617, "grad_norm": 1.976938009262085, "learning_rate": 9.538051006748244e-06, "loss": 0.8882, "step": 4637 }, { "epoch": 0.16382252559726962, "grad_norm": 1.6718236207962036, "learning_rate": 9.53781083934632e-06, "loss": 0.8054, "step": 4638 }, { "epoch": 0.16385784740097753, "grad_norm": 1.632684350013733, "learning_rate": 9.537570612554383e-06, "loss": 0.8251, "step": 4639 }, { "epoch": 0.16389316920468544, "grad_norm": 1.6651562452316284, "learning_rate": 9.537330326375574e-06, "loss": 0.8577, "step": 4640 }, { "epoch": 0.16392849100839335, "grad_norm": 1.6561359167099, "learning_rate": 9.537089980813037e-06, "loss": 0.8251, "step": 4641 }, { "epoch": 0.16396381281210126, "grad_norm": 1.3153862953186035, "learning_rate": 9.53684957586992e-06, "loss": 0.6761, "step": 4642 }, { "epoch": 0.16399913461580914, "grad_norm": 1.7686611413955688, "learning_rate": 9.536609111549371e-06, "loss": 0.8944, "step": 4643 }, { "epoch": 0.16403445641951706, "grad_norm": 1.7156628370285034, "learning_rate": 9.536368587854531e-06, "loss": 0.8679, "step": 4644 }, { "epoch": 0.16406977822322497, "grad_norm": 1.8648769855499268, "learning_rate": 9.536128004788553e-06, "loss": 0.8515, "step": 4645 }, { "epoch": 0.16410510002693288, "grad_norm": 1.9319653511047363, "learning_rate": 9.535887362354587e-06, "loss": 0.8278, "step": 4646 }, { "epoch": 0.1641404218306408, "grad_norm": 1.8362427949905396, "learning_rate": 9.535646660555777e-06, "loss": 0.8831, "step": 4647 }, { "epoch": 0.1641757436343487, "grad_norm": 1.7278321981430054, "learning_rate": 9.535405899395278e-06, "loss": 0.8709, "step": 4648 }, { "epoch": 0.1642110654380566, "grad_norm": 1.6102410554885864, "learning_rate": 9.535165078876237e-06, "loss": 0.8602, "step": 4649 }, { "epoch": 0.1642463872417645, "grad_norm": 1.7569594383239746, "learning_rate": 9.534924199001809e-06, "loss": 0.8746, "step": 4650 }, { "epoch": 0.1642817090454724, "grad_norm": 1.7398799657821655, "learning_rate": 9.534683259775146e-06, "loss": 0.8369, "step": 4651 }, { "epoch": 0.1643170308491803, "grad_norm": 1.6096149682998657, "learning_rate": 9.5344422611994e-06, "loss": 0.8842, "step": 4652 }, { "epoch": 0.16435235265288822, "grad_norm": 1.7566739320755005, "learning_rate": 9.534201203277726e-06, "loss": 0.8828, "step": 4653 }, { "epoch": 0.16438767445659613, "grad_norm": 1.575818657875061, "learning_rate": 9.533960086013281e-06, "loss": 0.8504, "step": 4654 }, { "epoch": 0.16442299626030404, "grad_norm": 1.90034019947052, "learning_rate": 9.533718909409217e-06, "loss": 0.8935, "step": 4655 }, { "epoch": 0.16445831806401193, "grad_norm": 1.7602804899215698, "learning_rate": 9.533477673468691e-06, "loss": 0.8717, "step": 4656 }, { "epoch": 0.16449363986771984, "grad_norm": 2.3617193698883057, "learning_rate": 9.533236378194863e-06, "loss": 0.8812, "step": 4657 }, { "epoch": 0.16452896167142775, "grad_norm": 1.849642276763916, "learning_rate": 9.53299502359089e-06, "loss": 0.8555, "step": 4658 }, { "epoch": 0.16456428347513566, "grad_norm": 1.9937787055969238, "learning_rate": 9.532753609659929e-06, "loss": 0.8732, "step": 4659 }, { "epoch": 0.16459960527884357, "grad_norm": 1.673282265663147, "learning_rate": 9.53251213640514e-06, "loss": 0.8321, "step": 4660 }, { "epoch": 0.16463492708255148, "grad_norm": 1.68064284324646, "learning_rate": 9.532270603829688e-06, "loss": 0.8618, "step": 4661 }, { "epoch": 0.1646702488862594, "grad_norm": 1.71084463596344, "learning_rate": 9.532029011936728e-06, "loss": 0.8318, "step": 4662 }, { "epoch": 0.16470557068996727, "grad_norm": 1.801803708076477, "learning_rate": 9.531787360729423e-06, "loss": 0.8497, "step": 4663 }, { "epoch": 0.16474089249367518, "grad_norm": 1.8999170064926147, "learning_rate": 9.531545650210939e-06, "loss": 0.8722, "step": 4664 }, { "epoch": 0.1647762142973831, "grad_norm": 1.8810335397720337, "learning_rate": 9.531303880384435e-06, "loss": 0.8441, "step": 4665 }, { "epoch": 0.164811536101091, "grad_norm": 1.906029224395752, "learning_rate": 9.53106205125308e-06, "loss": 0.8411, "step": 4666 }, { "epoch": 0.16484685790479892, "grad_norm": 2.239203929901123, "learning_rate": 9.530820162820034e-06, "loss": 0.8701, "step": 4667 }, { "epoch": 0.16488217970850683, "grad_norm": 1.790255069732666, "learning_rate": 9.530578215088467e-06, "loss": 0.8343, "step": 4668 }, { "epoch": 0.1649175015122147, "grad_norm": 1.7603585720062256, "learning_rate": 9.530336208061544e-06, "loss": 0.8448, "step": 4669 }, { "epoch": 0.16495282331592262, "grad_norm": 1.7141916751861572, "learning_rate": 9.530094141742431e-06, "loss": 0.8714, "step": 4670 }, { "epoch": 0.16498814511963053, "grad_norm": 1.9211136102676392, "learning_rate": 9.5298520161343e-06, "loss": 0.8898, "step": 4671 }, { "epoch": 0.16502346692333844, "grad_norm": 1.9491918087005615, "learning_rate": 9.529609831240317e-06, "loss": 0.858, "step": 4672 }, { "epoch": 0.16505878872704635, "grad_norm": 1.7915791273117065, "learning_rate": 9.52936758706365e-06, "loss": 0.8556, "step": 4673 }, { "epoch": 0.16509411053075426, "grad_norm": 1.9555485248565674, "learning_rate": 9.529125283607473e-06, "loss": 0.8153, "step": 4674 }, { "epoch": 0.16512943233446217, "grad_norm": 1.8348472118377686, "learning_rate": 9.528882920874954e-06, "loss": 0.8422, "step": 4675 }, { "epoch": 0.16516475413817006, "grad_norm": 1.732576847076416, "learning_rate": 9.528640498869268e-06, "loss": 0.8884, "step": 4676 }, { "epoch": 0.16520007594187797, "grad_norm": 1.8993563652038574, "learning_rate": 9.528398017593587e-06, "loss": 0.8883, "step": 4677 }, { "epoch": 0.16523539774558588, "grad_norm": 1.7528234720230103, "learning_rate": 9.528155477051084e-06, "loss": 0.8262, "step": 4678 }, { "epoch": 0.1652707195492938, "grad_norm": 2.2836413383483887, "learning_rate": 9.527912877244932e-06, "loss": 0.8892, "step": 4679 }, { "epoch": 0.1653060413530017, "grad_norm": 1.7964714765548706, "learning_rate": 9.527670218178308e-06, "loss": 0.8533, "step": 4680 }, { "epoch": 0.1653413631567096, "grad_norm": 1.8594005107879639, "learning_rate": 9.527427499854386e-06, "loss": 0.856, "step": 4681 }, { "epoch": 0.1653766849604175, "grad_norm": 1.7510066032409668, "learning_rate": 9.527184722276346e-06, "loss": 0.8677, "step": 4682 }, { "epoch": 0.1654120067641254, "grad_norm": 1.7875373363494873, "learning_rate": 9.526941885447363e-06, "loss": 0.8823, "step": 4683 }, { "epoch": 0.1654473285678333, "grad_norm": 1.1908447742462158, "learning_rate": 9.526698989370614e-06, "loss": 0.6221, "step": 4684 }, { "epoch": 0.16548265037154122, "grad_norm": 1.8377718925476074, "learning_rate": 9.52645603404928e-06, "loss": 0.87, "step": 4685 }, { "epoch": 0.16551797217524913, "grad_norm": 1.9014291763305664, "learning_rate": 9.526213019486543e-06, "loss": 0.8638, "step": 4686 }, { "epoch": 0.16555329397895704, "grad_norm": 1.768385887145996, "learning_rate": 9.525969945685579e-06, "loss": 0.8502, "step": 4687 }, { "epoch": 0.16558861578266496, "grad_norm": 1.81208074092865, "learning_rate": 9.52572681264957e-06, "loss": 0.8965, "step": 4688 }, { "epoch": 0.16562393758637284, "grad_norm": 1.6085383892059326, "learning_rate": 9.5254836203817e-06, "loss": 0.8604, "step": 4689 }, { "epoch": 0.16565925939008075, "grad_norm": 1.6909314393997192, "learning_rate": 9.525240368885152e-06, "loss": 0.8548, "step": 4690 }, { "epoch": 0.16569458119378866, "grad_norm": 1.630415439605713, "learning_rate": 9.524997058163108e-06, "loss": 0.8447, "step": 4691 }, { "epoch": 0.16572990299749657, "grad_norm": 1.8863284587860107, "learning_rate": 9.524753688218753e-06, "loss": 0.8518, "step": 4692 }, { "epoch": 0.16576522480120448, "grad_norm": 1.8562209606170654, "learning_rate": 9.524510259055272e-06, "loss": 0.8745, "step": 4693 }, { "epoch": 0.1658005466049124, "grad_norm": 1.8393434286117554, "learning_rate": 9.524266770675853e-06, "loss": 0.8634, "step": 4694 }, { "epoch": 0.16583586840862027, "grad_norm": 1.6671748161315918, "learning_rate": 9.524023223083677e-06, "loss": 0.8679, "step": 4695 }, { "epoch": 0.16587119021232818, "grad_norm": 1.9222475290298462, "learning_rate": 9.523779616281939e-06, "loss": 0.906, "step": 4696 }, { "epoch": 0.1659065120160361, "grad_norm": 1.8889226913452148, "learning_rate": 9.523535950273823e-06, "loss": 0.8637, "step": 4697 }, { "epoch": 0.165941833819744, "grad_norm": 1.9428578615188599, "learning_rate": 9.523292225062518e-06, "loss": 0.8719, "step": 4698 }, { "epoch": 0.16597715562345192, "grad_norm": 1.6513285636901855, "learning_rate": 9.523048440651216e-06, "loss": 0.8925, "step": 4699 }, { "epoch": 0.16601247742715983, "grad_norm": 1.707615852355957, "learning_rate": 9.522804597043105e-06, "loss": 0.8883, "step": 4700 }, { "epoch": 0.16604779923086774, "grad_norm": 1.8328862190246582, "learning_rate": 9.522560694241378e-06, "loss": 0.8741, "step": 4701 }, { "epoch": 0.16608312103457562, "grad_norm": 2.0761008262634277, "learning_rate": 9.522316732249226e-06, "loss": 0.8715, "step": 4702 }, { "epoch": 0.16611844283828353, "grad_norm": 2.767709970474243, "learning_rate": 9.522072711069846e-06, "loss": 0.8566, "step": 4703 }, { "epoch": 0.16615376464199144, "grad_norm": 1.6421457529067993, "learning_rate": 9.521828630706424e-06, "loss": 0.8517, "step": 4704 }, { "epoch": 0.16618908644569935, "grad_norm": 1.7545578479766846, "learning_rate": 9.521584491162161e-06, "loss": 0.8564, "step": 4705 }, { "epoch": 0.16622440824940726, "grad_norm": 1.7884340286254883, "learning_rate": 9.52134029244025e-06, "loss": 0.8287, "step": 4706 }, { "epoch": 0.16625973005311517, "grad_norm": 1.8560702800750732, "learning_rate": 9.52109603454389e-06, "loss": 0.9211, "step": 4707 }, { "epoch": 0.16629505185682306, "grad_norm": 2.2874696254730225, "learning_rate": 9.520851717476271e-06, "loss": 0.8132, "step": 4708 }, { "epoch": 0.16633037366053097, "grad_norm": 3.6933071613311768, "learning_rate": 9.520607341240595e-06, "loss": 0.8741, "step": 4709 }, { "epoch": 0.16636569546423888, "grad_norm": 2.0798189640045166, "learning_rate": 9.520362905840062e-06, "loss": 0.8511, "step": 4710 }, { "epoch": 0.1664010172679468, "grad_norm": 1.8383816480636597, "learning_rate": 9.520118411277867e-06, "loss": 0.839, "step": 4711 }, { "epoch": 0.1664363390716547, "grad_norm": 1.7867660522460938, "learning_rate": 9.519873857557213e-06, "loss": 0.8345, "step": 4712 }, { "epoch": 0.1664716608753626, "grad_norm": 1.6657110452651978, "learning_rate": 9.519629244681298e-06, "loss": 0.8508, "step": 4713 }, { "epoch": 0.16650698267907052, "grad_norm": 1.8233169317245483, "learning_rate": 9.519384572653328e-06, "loss": 0.8452, "step": 4714 }, { "epoch": 0.1665423044827784, "grad_norm": 1.2944839000701904, "learning_rate": 9.5191398414765e-06, "loss": 0.6145, "step": 4715 }, { "epoch": 0.1665776262864863, "grad_norm": 1.6857032775878906, "learning_rate": 9.51889505115402e-06, "loss": 0.8798, "step": 4716 }, { "epoch": 0.16661294809019422, "grad_norm": 1.7872271537780762, "learning_rate": 9.518650201689091e-06, "loss": 0.8667, "step": 4717 }, { "epoch": 0.16664826989390213, "grad_norm": 1.9354182481765747, "learning_rate": 9.518405293084917e-06, "loss": 0.8531, "step": 4718 }, { "epoch": 0.16668359169761005, "grad_norm": 1.7909445762634277, "learning_rate": 9.518160325344703e-06, "loss": 0.8791, "step": 4719 }, { "epoch": 0.16671891350131796, "grad_norm": 1.7309879064559937, "learning_rate": 9.517915298471658e-06, "loss": 0.8702, "step": 4720 }, { "epoch": 0.16675423530502584, "grad_norm": 1.7309458255767822, "learning_rate": 9.517670212468986e-06, "loss": 0.847, "step": 4721 }, { "epoch": 0.16678955710873375, "grad_norm": 1.8617337942123413, "learning_rate": 9.517425067339897e-06, "loss": 0.8626, "step": 4722 }, { "epoch": 0.16682487891244166, "grad_norm": 1.7913892269134521, "learning_rate": 9.517179863087596e-06, "loss": 0.8563, "step": 4723 }, { "epoch": 0.16686020071614957, "grad_norm": 1.9747421741485596, "learning_rate": 9.516934599715293e-06, "loss": 0.8696, "step": 4724 }, { "epoch": 0.16689552251985748, "grad_norm": 1.7313075065612793, "learning_rate": 9.5166892772262e-06, "loss": 0.8562, "step": 4725 }, { "epoch": 0.1669308443235654, "grad_norm": 1.7867660522460938, "learning_rate": 9.516443895623528e-06, "loss": 0.8339, "step": 4726 }, { "epoch": 0.1669661661272733, "grad_norm": 1.6678837537765503, "learning_rate": 9.516198454910488e-06, "loss": 0.8676, "step": 4727 }, { "epoch": 0.16700148793098119, "grad_norm": 1.8718799352645874, "learning_rate": 9.515952955090291e-06, "loss": 0.8658, "step": 4728 }, { "epoch": 0.1670368097346891, "grad_norm": 1.7966276407241821, "learning_rate": 9.51570739616615e-06, "loss": 0.8609, "step": 4729 }, { "epoch": 0.167072131538397, "grad_norm": 1.7710180282592773, "learning_rate": 9.515461778141278e-06, "loss": 0.85, "step": 4730 }, { "epoch": 0.16710745334210492, "grad_norm": 1.7213553190231323, "learning_rate": 9.515216101018893e-06, "loss": 0.883, "step": 4731 }, { "epoch": 0.16714277514581283, "grad_norm": 1.7095293998718262, "learning_rate": 9.514970364802207e-06, "loss": 0.8966, "step": 4732 }, { "epoch": 0.16717809694952074, "grad_norm": 1.8083316087722778, "learning_rate": 9.51472456949444e-06, "loss": 0.8505, "step": 4733 }, { "epoch": 0.16721341875322862, "grad_norm": 2.371424913406372, "learning_rate": 9.514478715098806e-06, "loss": 0.9184, "step": 4734 }, { "epoch": 0.16724874055693653, "grad_norm": 2.0326993465423584, "learning_rate": 9.51423280161852e-06, "loss": 0.9103, "step": 4735 }, { "epoch": 0.16728406236064444, "grad_norm": 1.8931570053100586, "learning_rate": 9.513986829056807e-06, "loss": 0.9229, "step": 4736 }, { "epoch": 0.16731938416435235, "grad_norm": 1.8478376865386963, "learning_rate": 9.513740797416882e-06, "loss": 0.8709, "step": 4737 }, { "epoch": 0.16735470596806026, "grad_norm": 1.7154197692871094, "learning_rate": 9.513494706701965e-06, "loss": 0.8894, "step": 4738 }, { "epoch": 0.16739002777176817, "grad_norm": 2.130053997039795, "learning_rate": 9.513248556915278e-06, "loss": 0.8767, "step": 4739 }, { "epoch": 0.16742534957547608, "grad_norm": 1.6215472221374512, "learning_rate": 9.513002348060043e-06, "loss": 0.8384, "step": 4740 }, { "epoch": 0.16746067137918397, "grad_norm": 2.0349228382110596, "learning_rate": 9.51275608013948e-06, "loss": 0.8729, "step": 4741 }, { "epoch": 0.16749599318289188, "grad_norm": 1.6863185167312622, "learning_rate": 9.512509753156816e-06, "loss": 0.859, "step": 4742 }, { "epoch": 0.1675313149865998, "grad_norm": 1.9081480503082275, "learning_rate": 9.512263367115271e-06, "loss": 0.8381, "step": 4743 }, { "epoch": 0.1675666367903077, "grad_norm": 1.7300604581832886, "learning_rate": 9.512016922018072e-06, "loss": 0.8589, "step": 4744 }, { "epoch": 0.1676019585940156, "grad_norm": 1.6659456491470337, "learning_rate": 9.511770417868445e-06, "loss": 0.8403, "step": 4745 }, { "epoch": 0.16763728039772352, "grad_norm": 1.7780554294586182, "learning_rate": 9.511523854669613e-06, "loss": 0.8777, "step": 4746 }, { "epoch": 0.1676726022014314, "grad_norm": 1.7113784551620483, "learning_rate": 9.511277232424804e-06, "loss": 0.8607, "step": 4747 }, { "epoch": 0.1677079240051393, "grad_norm": 1.680737853050232, "learning_rate": 9.51103055113725e-06, "loss": 0.8825, "step": 4748 }, { "epoch": 0.16774324580884722, "grad_norm": 1.7648967504501343, "learning_rate": 9.510783810810173e-06, "loss": 0.8675, "step": 4749 }, { "epoch": 0.16777856761255514, "grad_norm": 1.8555583953857422, "learning_rate": 9.510537011446808e-06, "loss": 0.8965, "step": 4750 }, { "epoch": 0.16781388941626305, "grad_norm": 1.6336252689361572, "learning_rate": 9.51029015305038e-06, "loss": 0.8367, "step": 4751 }, { "epoch": 0.16784921121997096, "grad_norm": 1.7449257373809814, "learning_rate": 9.510043235624123e-06, "loss": 0.8483, "step": 4752 }, { "epoch": 0.16788453302367887, "grad_norm": 1.7439273595809937, "learning_rate": 9.50979625917127e-06, "loss": 0.8901, "step": 4753 }, { "epoch": 0.16791985482738675, "grad_norm": 1.6385027170181274, "learning_rate": 9.509549223695049e-06, "loss": 0.8389, "step": 4754 }, { "epoch": 0.16795517663109466, "grad_norm": 1.6732805967330933, "learning_rate": 9.509302129198697e-06, "loss": 0.8544, "step": 4755 }, { "epoch": 0.16799049843480257, "grad_norm": 1.9398382902145386, "learning_rate": 9.509054975685444e-06, "loss": 0.9074, "step": 4756 }, { "epoch": 0.16802582023851048, "grad_norm": 1.710801601409912, "learning_rate": 9.508807763158529e-06, "loss": 0.8665, "step": 4757 }, { "epoch": 0.1680611420422184, "grad_norm": 1.9628359079360962, "learning_rate": 9.508560491621185e-06, "loss": 0.8902, "step": 4758 }, { "epoch": 0.1680964638459263, "grad_norm": 2.0653388500213623, "learning_rate": 9.50831316107665e-06, "loss": 0.8689, "step": 4759 }, { "epoch": 0.16813178564963419, "grad_norm": 1.7658531665802002, "learning_rate": 9.508065771528156e-06, "loss": 0.8459, "step": 4760 }, { "epoch": 0.1681671074533421, "grad_norm": 2.0418190956115723, "learning_rate": 9.507818322978948e-06, "loss": 0.8478, "step": 4761 }, { "epoch": 0.16820242925705, "grad_norm": 1.7694125175476074, "learning_rate": 9.50757081543226e-06, "loss": 0.8437, "step": 4762 }, { "epoch": 0.16823775106075792, "grad_norm": 1.7680264711380005, "learning_rate": 9.507323248891334e-06, "loss": 0.8778, "step": 4763 }, { "epoch": 0.16827307286446583, "grad_norm": 1.7621654272079468, "learning_rate": 9.507075623359407e-06, "loss": 0.8685, "step": 4764 }, { "epoch": 0.16830839466817374, "grad_norm": 1.9227426052093506, "learning_rate": 9.506827938839722e-06, "loss": 0.8597, "step": 4765 }, { "epoch": 0.16834371647188165, "grad_norm": 1.6750093698501587, "learning_rate": 9.506580195335522e-06, "loss": 0.8724, "step": 4766 }, { "epoch": 0.16837903827558953, "grad_norm": 1.859567403793335, "learning_rate": 9.506332392850044e-06, "loss": 0.8577, "step": 4767 }, { "epoch": 0.16841436007929744, "grad_norm": 2.0052382946014404, "learning_rate": 9.506084531386536e-06, "loss": 0.8385, "step": 4768 }, { "epoch": 0.16844968188300535, "grad_norm": 1.9581917524337769, "learning_rate": 9.505836610948243e-06, "loss": 0.89, "step": 4769 }, { "epoch": 0.16848500368671326, "grad_norm": 1.9052997827529907, "learning_rate": 9.505588631538406e-06, "loss": 0.8696, "step": 4770 }, { "epoch": 0.16852032549042117, "grad_norm": 1.663680076599121, "learning_rate": 9.505340593160271e-06, "loss": 0.8618, "step": 4771 }, { "epoch": 0.16855564729412909, "grad_norm": 1.7810760736465454, "learning_rate": 9.505092495817085e-06, "loss": 0.8381, "step": 4772 }, { "epoch": 0.16859096909783697, "grad_norm": 1.8951268196105957, "learning_rate": 9.504844339512096e-06, "loss": 0.8311, "step": 4773 }, { "epoch": 0.16862629090154488, "grad_norm": 1.6577467918395996, "learning_rate": 9.504596124248552e-06, "loss": 0.8522, "step": 4774 }, { "epoch": 0.1686616127052528, "grad_norm": 1.6997169256210327, "learning_rate": 9.504347850029699e-06, "loss": 0.8953, "step": 4775 }, { "epoch": 0.1686969345089607, "grad_norm": 1.8278273344039917, "learning_rate": 9.50409951685879e-06, "loss": 0.8864, "step": 4776 }, { "epoch": 0.1687322563126686, "grad_norm": 1.2765905857086182, "learning_rate": 9.503851124739073e-06, "loss": 0.6202, "step": 4777 }, { "epoch": 0.16876757811637652, "grad_norm": 1.643698811531067, "learning_rate": 9.503602673673798e-06, "loss": 0.8632, "step": 4778 }, { "epoch": 0.16880289992008443, "grad_norm": 1.8579111099243164, "learning_rate": 9.503354163666217e-06, "loss": 0.8456, "step": 4779 }, { "epoch": 0.16883822172379231, "grad_norm": 1.8654184341430664, "learning_rate": 9.503105594719584e-06, "loss": 0.8872, "step": 4780 }, { "epoch": 0.16887354352750023, "grad_norm": 1.6527998447418213, "learning_rate": 9.502856966837152e-06, "loss": 0.8301, "step": 4781 }, { "epoch": 0.16890886533120814, "grad_norm": 1.9438709020614624, "learning_rate": 9.502608280022173e-06, "loss": 0.888, "step": 4782 }, { "epoch": 0.16894418713491605, "grad_norm": 1.8272979259490967, "learning_rate": 9.502359534277905e-06, "loss": 0.8355, "step": 4783 }, { "epoch": 0.16897950893862396, "grad_norm": 1.9617884159088135, "learning_rate": 9.502110729607602e-06, "loss": 0.8336, "step": 4784 }, { "epoch": 0.16901483074233187, "grad_norm": 1.5910011529922485, "learning_rate": 9.50186186601452e-06, "loss": 0.8381, "step": 4785 }, { "epoch": 0.16905015254603975, "grad_norm": 1.078162431716919, "learning_rate": 9.501612943501914e-06, "loss": 0.5849, "step": 4786 }, { "epoch": 0.16908547434974766, "grad_norm": 1.7735282182693481, "learning_rate": 9.501363962073046e-06, "loss": 0.8826, "step": 4787 }, { "epoch": 0.16912079615345557, "grad_norm": 1.9651767015457153, "learning_rate": 9.501114921731171e-06, "loss": 0.857, "step": 4788 }, { "epoch": 0.16915611795716348, "grad_norm": 1.829727292060852, "learning_rate": 9.500865822479553e-06, "loss": 0.8887, "step": 4789 }, { "epoch": 0.1691914397608714, "grad_norm": 1.7404943704605103, "learning_rate": 9.500616664321448e-06, "loss": 0.8856, "step": 4790 }, { "epoch": 0.1692267615645793, "grad_norm": 1.7850189208984375, "learning_rate": 9.500367447260116e-06, "loss": 0.8492, "step": 4791 }, { "epoch": 0.16926208336828721, "grad_norm": 1.6477512121200562, "learning_rate": 9.500118171298822e-06, "loss": 0.8964, "step": 4792 }, { "epoch": 0.1692974051719951, "grad_norm": 1.7931305170059204, "learning_rate": 9.499868836440827e-06, "loss": 0.9041, "step": 4793 }, { "epoch": 0.169332726975703, "grad_norm": 2.212148904800415, "learning_rate": 9.499619442689396e-06, "loss": 0.8252, "step": 4794 }, { "epoch": 0.16936804877941092, "grad_norm": 1.846766471862793, "learning_rate": 9.49936999004779e-06, "loss": 0.8908, "step": 4795 }, { "epoch": 0.16940337058311883, "grad_norm": 1.6013914346694946, "learning_rate": 9.499120478519276e-06, "loss": 0.8444, "step": 4796 }, { "epoch": 0.16943869238682674, "grad_norm": 1.6367926597595215, "learning_rate": 9.498870908107119e-06, "loss": 0.8188, "step": 4797 }, { "epoch": 0.16947401419053465, "grad_norm": 1.8077778816223145, "learning_rate": 9.498621278814586e-06, "loss": 0.848, "step": 4798 }, { "epoch": 0.16950933599424253, "grad_norm": 1.678701639175415, "learning_rate": 9.498371590644942e-06, "loss": 0.8795, "step": 4799 }, { "epoch": 0.16954465779795044, "grad_norm": 1.7143700122833252, "learning_rate": 9.498121843601458e-06, "loss": 0.8784, "step": 4800 }, { "epoch": 0.16957997960165835, "grad_norm": 1.9640263319015503, "learning_rate": 9.497872037687399e-06, "loss": 0.8835, "step": 4801 }, { "epoch": 0.16961530140536626, "grad_norm": 1.6915802955627441, "learning_rate": 9.497622172906035e-06, "loss": 0.8506, "step": 4802 }, { "epoch": 0.16965062320907418, "grad_norm": 1.6405073404312134, "learning_rate": 9.497372249260641e-06, "loss": 0.852, "step": 4803 }, { "epoch": 0.16968594501278209, "grad_norm": 1.6776584386825562, "learning_rate": 9.497122266754483e-06, "loss": 0.8394, "step": 4804 }, { "epoch": 0.16972126681649, "grad_norm": 1.8037128448486328, "learning_rate": 9.496872225390835e-06, "loss": 0.9072, "step": 4805 }, { "epoch": 0.16975658862019788, "grad_norm": 1.7687482833862305, "learning_rate": 9.496622125172967e-06, "loss": 0.8774, "step": 4806 }, { "epoch": 0.1697919104239058, "grad_norm": 1.5193324089050293, "learning_rate": 9.496371966104155e-06, "loss": 0.8279, "step": 4807 }, { "epoch": 0.1698272322276137, "grad_norm": 1.652915358543396, "learning_rate": 9.49612174818767e-06, "loss": 0.8409, "step": 4808 }, { "epoch": 0.1698625540313216, "grad_norm": 1.7000067234039307, "learning_rate": 9.495871471426789e-06, "loss": 0.8977, "step": 4809 }, { "epoch": 0.16989787583502952, "grad_norm": 1.6555256843566895, "learning_rate": 9.49562113582479e-06, "loss": 0.8813, "step": 4810 }, { "epoch": 0.16993319763873743, "grad_norm": 1.2516816854476929, "learning_rate": 9.495370741384944e-06, "loss": 0.6238, "step": 4811 }, { "epoch": 0.16996851944244531, "grad_norm": 1.7959530353546143, "learning_rate": 9.495120288110533e-06, "loss": 0.8732, "step": 4812 }, { "epoch": 0.17000384124615323, "grad_norm": 1.6188256740570068, "learning_rate": 9.494869776004831e-06, "loss": 0.8502, "step": 4813 }, { "epoch": 0.17003916304986114, "grad_norm": 1.5791897773742676, "learning_rate": 9.494619205071118e-06, "loss": 0.8593, "step": 4814 }, { "epoch": 0.17007448485356905, "grad_norm": 1.5307235717773438, "learning_rate": 9.494368575312676e-06, "loss": 0.8679, "step": 4815 }, { "epoch": 0.17010980665727696, "grad_norm": 13.981131553649902, "learning_rate": 9.494117886732781e-06, "loss": 0.8923, "step": 4816 }, { "epoch": 0.17014512846098487, "grad_norm": 1.8668497800827026, "learning_rate": 9.493867139334716e-06, "loss": 0.8675, "step": 4817 }, { "epoch": 0.17018045026469278, "grad_norm": 1.705970287322998, "learning_rate": 9.493616333121763e-06, "loss": 0.8667, "step": 4818 }, { "epoch": 0.17021577206840066, "grad_norm": 2.021613597869873, "learning_rate": 9.493365468097204e-06, "loss": 0.8594, "step": 4819 }, { "epoch": 0.17025109387210857, "grad_norm": 1.5951182842254639, "learning_rate": 9.493114544264324e-06, "loss": 0.8855, "step": 4820 }, { "epoch": 0.17028641567581648, "grad_norm": 1.9274669885635376, "learning_rate": 9.492863561626404e-06, "loss": 0.8689, "step": 4821 }, { "epoch": 0.1703217374795244, "grad_norm": 1.8623173236846924, "learning_rate": 9.49261252018673e-06, "loss": 0.8694, "step": 4822 }, { "epoch": 0.1703570592832323, "grad_norm": 1.7523019313812256, "learning_rate": 9.492361419948589e-06, "loss": 0.8891, "step": 4823 }, { "epoch": 0.17039238108694021, "grad_norm": 1.9049162864685059, "learning_rate": 9.492110260915265e-06, "loss": 0.882, "step": 4824 }, { "epoch": 0.1704277028906481, "grad_norm": 1.8709149360656738, "learning_rate": 9.491859043090046e-06, "loss": 0.8776, "step": 4825 }, { "epoch": 0.170463024694356, "grad_norm": 1.8601011037826538, "learning_rate": 9.491607766476221e-06, "loss": 0.8368, "step": 4826 }, { "epoch": 0.17049834649806392, "grad_norm": 1.7956126928329468, "learning_rate": 9.491356431077078e-06, "loss": 0.8699, "step": 4827 }, { "epoch": 0.17053366830177183, "grad_norm": 1.6212687492370605, "learning_rate": 9.491105036895906e-06, "loss": 0.8703, "step": 4828 }, { "epoch": 0.17056899010547974, "grad_norm": 1.8325358629226685, "learning_rate": 9.490853583935995e-06, "loss": 0.8293, "step": 4829 }, { "epoch": 0.17060431190918765, "grad_norm": 1.8000547885894775, "learning_rate": 9.490602072200637e-06, "loss": 0.8664, "step": 4830 }, { "epoch": 0.17063963371289556, "grad_norm": 1.8756375312805176, "learning_rate": 9.490350501693123e-06, "loss": 0.8128, "step": 4831 }, { "epoch": 0.17067495551660344, "grad_norm": 1.7905750274658203, "learning_rate": 9.490098872416746e-06, "loss": 0.8846, "step": 4832 }, { "epoch": 0.17071027732031135, "grad_norm": 2.112010955810547, "learning_rate": 9.4898471843748e-06, "loss": 0.8749, "step": 4833 }, { "epoch": 0.17074559912401926, "grad_norm": 2.063351631164551, "learning_rate": 9.489595437570578e-06, "loss": 0.8829, "step": 4834 }, { "epoch": 0.17078092092772718, "grad_norm": 1.6912747621536255, "learning_rate": 9.489343632007372e-06, "loss": 0.8668, "step": 4835 }, { "epoch": 0.1708162427314351, "grad_norm": 1.755386471748352, "learning_rate": 9.489091767688483e-06, "loss": 0.8455, "step": 4836 }, { "epoch": 0.170851564535143, "grad_norm": 1.8187464475631714, "learning_rate": 9.488839844617206e-06, "loss": 0.8462, "step": 4837 }, { "epoch": 0.17088688633885088, "grad_norm": 1.7853142023086548, "learning_rate": 9.488587862796834e-06, "loss": 0.8627, "step": 4838 }, { "epoch": 0.1709222081425588, "grad_norm": 1.7202582359313965, "learning_rate": 9.48833582223067e-06, "loss": 0.8956, "step": 4839 }, { "epoch": 0.1709575299462667, "grad_norm": 1.7662321329116821, "learning_rate": 9.488083722922009e-06, "loss": 0.9115, "step": 4840 }, { "epoch": 0.1709928517499746, "grad_norm": 1.866338849067688, "learning_rate": 9.487831564874152e-06, "loss": 0.8586, "step": 4841 }, { "epoch": 0.17102817355368252, "grad_norm": 7.658580780029297, "learning_rate": 9.4875793480904e-06, "loss": 0.8441, "step": 4842 }, { "epoch": 0.17106349535739043, "grad_norm": 1.9667670726776123, "learning_rate": 9.487327072574053e-06, "loss": 0.8328, "step": 4843 }, { "epoch": 0.17109881716109834, "grad_norm": 2.0305075645446777, "learning_rate": 9.487074738328413e-06, "loss": 0.8725, "step": 4844 }, { "epoch": 0.17113413896480623, "grad_norm": 1.939949631690979, "learning_rate": 9.486822345356782e-06, "loss": 0.8768, "step": 4845 }, { "epoch": 0.17116946076851414, "grad_norm": 1.821601390838623, "learning_rate": 9.486569893662462e-06, "loss": 0.8824, "step": 4846 }, { "epoch": 0.17120478257222205, "grad_norm": 1.7195342779159546, "learning_rate": 9.48631738324876e-06, "loss": 0.8651, "step": 4847 }, { "epoch": 0.17124010437592996, "grad_norm": 1.9458352327346802, "learning_rate": 9.486064814118979e-06, "loss": 0.8618, "step": 4848 }, { "epoch": 0.17127542617963787, "grad_norm": 2.062504529953003, "learning_rate": 9.485812186276424e-06, "loss": 0.8831, "step": 4849 }, { "epoch": 0.17131074798334578, "grad_norm": 1.8546617031097412, "learning_rate": 9.485559499724405e-06, "loss": 0.8459, "step": 4850 }, { "epoch": 0.1713460697870537, "grad_norm": 1.9844456911087036, "learning_rate": 9.485306754466224e-06, "loss": 0.8837, "step": 4851 }, { "epoch": 0.17138139159076157, "grad_norm": 2.1566710472106934, "learning_rate": 9.485053950505193e-06, "loss": 0.8511, "step": 4852 }, { "epoch": 0.17141671339446948, "grad_norm": 1.7137092351913452, "learning_rate": 9.484801087844619e-06, "loss": 0.8182, "step": 4853 }, { "epoch": 0.1714520351981774, "grad_norm": 1.7794742584228516, "learning_rate": 9.48454816648781e-06, "loss": 0.8792, "step": 4854 }, { "epoch": 0.1714873570018853, "grad_norm": 2.5298242568969727, "learning_rate": 9.484295186438079e-06, "loss": 0.8858, "step": 4855 }, { "epoch": 0.17152267880559322, "grad_norm": 1.8050330877304077, "learning_rate": 9.484042147698735e-06, "loss": 0.8727, "step": 4856 }, { "epoch": 0.17155800060930113, "grad_norm": 1.7426815032958984, "learning_rate": 9.483789050273088e-06, "loss": 0.8801, "step": 4857 }, { "epoch": 0.171593322413009, "grad_norm": 1.7419917583465576, "learning_rate": 9.483535894164458e-06, "loss": 0.8573, "step": 4858 }, { "epoch": 0.17162864421671692, "grad_norm": 1.5630124807357788, "learning_rate": 9.483282679376149e-06, "loss": 0.8572, "step": 4859 }, { "epoch": 0.17166396602042483, "grad_norm": 1.8763906955718994, "learning_rate": 9.48302940591148e-06, "loss": 0.9024, "step": 4860 }, { "epoch": 0.17169928782413274, "grad_norm": 1.877541184425354, "learning_rate": 9.482776073773765e-06, "loss": 0.8715, "step": 4861 }, { "epoch": 0.17173460962784065, "grad_norm": 1.7944512367248535, "learning_rate": 9.482522682966319e-06, "loss": 0.8769, "step": 4862 }, { "epoch": 0.17176993143154856, "grad_norm": 1.917576789855957, "learning_rate": 9.48226923349246e-06, "loss": 0.8668, "step": 4863 }, { "epoch": 0.17180525323525647, "grad_norm": 1.867929458618164, "learning_rate": 9.482015725355502e-06, "loss": 0.8924, "step": 4864 }, { "epoch": 0.17184057503896435, "grad_norm": 1.9393428564071655, "learning_rate": 9.481762158558766e-06, "loss": 0.848, "step": 4865 }, { "epoch": 0.17187589684267227, "grad_norm": 1.8546286821365356, "learning_rate": 9.481508533105568e-06, "loss": 0.8707, "step": 4866 }, { "epoch": 0.17191121864638018, "grad_norm": 1.7268626689910889, "learning_rate": 9.48125484899923e-06, "loss": 0.8399, "step": 4867 }, { "epoch": 0.1719465404500881, "grad_norm": 1.700605869293213, "learning_rate": 9.48100110624307e-06, "loss": 0.8839, "step": 4868 }, { "epoch": 0.171981862253796, "grad_norm": 1.8321092128753662, "learning_rate": 9.480747304840413e-06, "loss": 0.8743, "step": 4869 }, { "epoch": 0.1720171840575039, "grad_norm": 1.7513891458511353, "learning_rate": 9.480493444794574e-06, "loss": 0.8551, "step": 4870 }, { "epoch": 0.1720525058612118, "grad_norm": 1.784515619277954, "learning_rate": 9.48023952610888e-06, "loss": 0.8495, "step": 4871 }, { "epoch": 0.1720878276649197, "grad_norm": 1.6780527830123901, "learning_rate": 9.479985548786654e-06, "loss": 0.8397, "step": 4872 }, { "epoch": 0.1721231494686276, "grad_norm": 1.682801604270935, "learning_rate": 9.479731512831219e-06, "loss": 0.8856, "step": 4873 }, { "epoch": 0.17215847127233552, "grad_norm": 1.7945871353149414, "learning_rate": 9.4794774182459e-06, "loss": 0.8298, "step": 4874 }, { "epoch": 0.17219379307604343, "grad_norm": 1.9780088663101196, "learning_rate": 9.479223265034021e-06, "loss": 0.8839, "step": 4875 }, { "epoch": 0.17222911487975134, "grad_norm": 1.755731225013733, "learning_rate": 9.478969053198913e-06, "loss": 0.8582, "step": 4876 }, { "epoch": 0.17226443668345925, "grad_norm": 2.049407958984375, "learning_rate": 9.478714782743897e-06, "loss": 0.8727, "step": 4877 }, { "epoch": 0.17229975848716714, "grad_norm": 2.128830671310425, "learning_rate": 9.478460453672305e-06, "loss": 0.8866, "step": 4878 }, { "epoch": 0.17233508029087505, "grad_norm": 1.8545013666152954, "learning_rate": 9.478206065987465e-06, "loss": 0.8504, "step": 4879 }, { "epoch": 0.17237040209458296, "grad_norm": 2.475745916366577, "learning_rate": 9.477951619692705e-06, "loss": 0.8757, "step": 4880 }, { "epoch": 0.17240572389829087, "grad_norm": 2.0648913383483887, "learning_rate": 9.477697114791356e-06, "loss": 0.8776, "step": 4881 }, { "epoch": 0.17244104570199878, "grad_norm": 1.826177954673767, "learning_rate": 9.47744255128675e-06, "loss": 0.8823, "step": 4882 }, { "epoch": 0.1724763675057067, "grad_norm": 1.9063860177993774, "learning_rate": 9.477187929182217e-06, "loss": 0.8963, "step": 4883 }, { "epoch": 0.17251168930941457, "grad_norm": 2.52262282371521, "learning_rate": 9.476933248481089e-06, "loss": 0.8079, "step": 4884 }, { "epoch": 0.17254701111312248, "grad_norm": 2.4491629600524902, "learning_rate": 9.4766785091867e-06, "loss": 0.8468, "step": 4885 }, { "epoch": 0.1725823329168304, "grad_norm": 1.7674992084503174, "learning_rate": 9.476423711302386e-06, "loss": 0.851, "step": 4886 }, { "epoch": 0.1726176547205383, "grad_norm": 1.9476791620254517, "learning_rate": 9.47616885483148e-06, "loss": 0.8578, "step": 4887 }, { "epoch": 0.17265297652424622, "grad_norm": 1.989750623703003, "learning_rate": 9.475913939777316e-06, "loss": 0.8631, "step": 4888 }, { "epoch": 0.17268829832795413, "grad_norm": 2.062547445297241, "learning_rate": 9.475658966143232e-06, "loss": 0.898, "step": 4889 }, { "epoch": 0.17272362013166204, "grad_norm": 1.8481663465499878, "learning_rate": 9.475403933932565e-06, "loss": 0.8742, "step": 4890 }, { "epoch": 0.17275894193536992, "grad_norm": 1.895594835281372, "learning_rate": 9.475148843148652e-06, "loss": 0.8868, "step": 4891 }, { "epoch": 0.17279426373907783, "grad_norm": 1.661016583442688, "learning_rate": 9.474893693794833e-06, "loss": 0.8557, "step": 4892 }, { "epoch": 0.17282958554278574, "grad_norm": 1.7895363569259644, "learning_rate": 9.474638485874448e-06, "loss": 0.8693, "step": 4893 }, { "epoch": 0.17286490734649365, "grad_norm": 1.7679466009140015, "learning_rate": 9.474383219390833e-06, "loss": 0.82, "step": 4894 }, { "epoch": 0.17290022915020156, "grad_norm": 1.74530029296875, "learning_rate": 9.474127894347332e-06, "loss": 0.8515, "step": 4895 }, { "epoch": 0.17293555095390947, "grad_norm": 1.7977067232131958, "learning_rate": 9.473872510747287e-06, "loss": 0.8651, "step": 4896 }, { "epoch": 0.17297087275761736, "grad_norm": 1.7974724769592285, "learning_rate": 9.47361706859404e-06, "loss": 0.8583, "step": 4897 }, { "epoch": 0.17300619456132527, "grad_norm": 1.9258856773376465, "learning_rate": 9.473361567890934e-06, "loss": 0.8671, "step": 4898 }, { "epoch": 0.17304151636503318, "grad_norm": 2.035026788711548, "learning_rate": 9.47310600864131e-06, "loss": 0.8732, "step": 4899 }, { "epoch": 0.1730768381687411, "grad_norm": 2.0133254528045654, "learning_rate": 9.472850390848517e-06, "loss": 0.8563, "step": 4900 }, { "epoch": 0.173112159972449, "grad_norm": 1.94316565990448, "learning_rate": 9.4725947145159e-06, "loss": 0.834, "step": 4901 }, { "epoch": 0.1731474817761569, "grad_norm": 2.1264400482177734, "learning_rate": 9.472338979646802e-06, "loss": 0.8912, "step": 4902 }, { "epoch": 0.17318280357986482, "grad_norm": 2.2739226818084717, "learning_rate": 9.472083186244573e-06, "loss": 0.8515, "step": 4903 }, { "epoch": 0.1732181253835727, "grad_norm": 2.0857863426208496, "learning_rate": 9.471827334312561e-06, "loss": 0.817, "step": 4904 }, { "epoch": 0.1732534471872806, "grad_norm": 2.1931440830230713, "learning_rate": 9.471571423854113e-06, "loss": 0.8914, "step": 4905 }, { "epoch": 0.17328876899098852, "grad_norm": 3.148566722869873, "learning_rate": 9.471315454872578e-06, "loss": 0.8843, "step": 4906 }, { "epoch": 0.17332409079469643, "grad_norm": 2.002163887023926, "learning_rate": 9.471059427371308e-06, "loss": 0.8458, "step": 4907 }, { "epoch": 0.17335941259840434, "grad_norm": 1.8730170726776123, "learning_rate": 9.470803341353652e-06, "loss": 0.8642, "step": 4908 }, { "epoch": 0.17339473440211225, "grad_norm": 1.8067331314086914, "learning_rate": 9.470547196822961e-06, "loss": 0.86, "step": 4909 }, { "epoch": 0.17343005620582014, "grad_norm": 1.9387223720550537, "learning_rate": 9.47029099378259e-06, "loss": 0.8431, "step": 4910 }, { "epoch": 0.17346537800952805, "grad_norm": 1.880462408065796, "learning_rate": 9.47003473223589e-06, "loss": 0.8282, "step": 4911 }, { "epoch": 0.17350069981323596, "grad_norm": 1.9366767406463623, "learning_rate": 9.469778412186217e-06, "loss": 0.8693, "step": 4912 }, { "epoch": 0.17353602161694387, "grad_norm": 2.040713310241699, "learning_rate": 9.469522033636925e-06, "loss": 0.899, "step": 4913 }, { "epoch": 0.17357134342065178, "grad_norm": 2.0102765560150146, "learning_rate": 9.469265596591366e-06, "loss": 0.8663, "step": 4914 }, { "epoch": 0.1736066652243597, "grad_norm": 1.8385854959487915, "learning_rate": 9.469009101052902e-06, "loss": 0.8601, "step": 4915 }, { "epoch": 0.1736419870280676, "grad_norm": 1.7783478498458862, "learning_rate": 9.468752547024886e-06, "loss": 0.8844, "step": 4916 }, { "epoch": 0.17367730883177548, "grad_norm": 2.017143487930298, "learning_rate": 9.468495934510675e-06, "loss": 0.8491, "step": 4917 }, { "epoch": 0.1737126306354834, "grad_norm": 1.9273098707199097, "learning_rate": 9.46823926351363e-06, "loss": 0.8705, "step": 4918 }, { "epoch": 0.1737479524391913, "grad_norm": 1.9312409162521362, "learning_rate": 9.46798253403711e-06, "loss": 0.8717, "step": 4919 }, { "epoch": 0.17378327424289922, "grad_norm": 2.136035203933716, "learning_rate": 9.467725746084475e-06, "loss": 0.8643, "step": 4920 }, { "epoch": 0.17381859604660713, "grad_norm": 1.897693157196045, "learning_rate": 9.467468899659085e-06, "loss": 0.8785, "step": 4921 }, { "epoch": 0.17385391785031504, "grad_norm": 2.1042728424072266, "learning_rate": 9.4672119947643e-06, "loss": 0.8591, "step": 4922 }, { "epoch": 0.17388923965402292, "grad_norm": 2.0262184143066406, "learning_rate": 9.466955031403487e-06, "loss": 0.838, "step": 4923 }, { "epoch": 0.17392456145773083, "grad_norm": 1.9399245977401733, "learning_rate": 9.466698009580003e-06, "loss": 0.8238, "step": 4924 }, { "epoch": 0.17395988326143874, "grad_norm": 1.806262731552124, "learning_rate": 9.466440929297218e-06, "loss": 0.8508, "step": 4925 }, { "epoch": 0.17399520506514665, "grad_norm": 1.925883412361145, "learning_rate": 9.466183790558494e-06, "loss": 0.8762, "step": 4926 }, { "epoch": 0.17403052686885456, "grad_norm": 1.800893783569336, "learning_rate": 9.465926593367194e-06, "loss": 0.8616, "step": 4927 }, { "epoch": 0.17406584867256247, "grad_norm": 1.9584338665008545, "learning_rate": 9.465669337726688e-06, "loss": 0.8764, "step": 4928 }, { "epoch": 0.17410117047627038, "grad_norm": 3.304926633834839, "learning_rate": 9.46541202364034e-06, "loss": 0.8541, "step": 4929 }, { "epoch": 0.17413649227997827, "grad_norm": 1.880275011062622, "learning_rate": 9.465154651111519e-06, "loss": 0.8472, "step": 4930 }, { "epoch": 0.17417181408368618, "grad_norm": 1.8025705814361572, "learning_rate": 9.464897220143594e-06, "loss": 0.8533, "step": 4931 }, { "epoch": 0.1742071358873941, "grad_norm": 1.9324456453323364, "learning_rate": 9.464639730739932e-06, "loss": 0.8773, "step": 4932 }, { "epoch": 0.174242457691102, "grad_norm": 1.972509741783142, "learning_rate": 9.464382182903905e-06, "loss": 0.8435, "step": 4933 }, { "epoch": 0.1742777794948099, "grad_norm": 2.0829057693481445, "learning_rate": 9.464124576638884e-06, "loss": 0.813, "step": 4934 }, { "epoch": 0.17431310129851782, "grad_norm": 1.6821539402008057, "learning_rate": 9.463866911948239e-06, "loss": 0.831, "step": 4935 }, { "epoch": 0.1743484231022257, "grad_norm": 1.710702896118164, "learning_rate": 9.463609188835343e-06, "loss": 0.8845, "step": 4936 }, { "epoch": 0.1743837449059336, "grad_norm": 2.061281204223633, "learning_rate": 9.46335140730357e-06, "loss": 0.8749, "step": 4937 }, { "epoch": 0.17441906670964152, "grad_norm": 2.1035096645355225, "learning_rate": 9.46309356735629e-06, "loss": 0.8331, "step": 4938 }, { "epoch": 0.17445438851334943, "grad_norm": 1.96645987033844, "learning_rate": 9.462835668996884e-06, "loss": 0.8729, "step": 4939 }, { "epoch": 0.17448971031705734, "grad_norm": 1.789821982383728, "learning_rate": 9.462577712228721e-06, "loss": 0.8761, "step": 4940 }, { "epoch": 0.17452503212076526, "grad_norm": 1.8388973474502563, "learning_rate": 9.46231969705518e-06, "loss": 0.8612, "step": 4941 }, { "epoch": 0.17456035392447317, "grad_norm": 1.7983225584030151, "learning_rate": 9.46206162347964e-06, "loss": 0.8861, "step": 4942 }, { "epoch": 0.17459567572818105, "grad_norm": 1.6854363679885864, "learning_rate": 9.461803491505475e-06, "loss": 0.8428, "step": 4943 }, { "epoch": 0.17463099753188896, "grad_norm": 1.9184575080871582, "learning_rate": 9.461545301136063e-06, "loss": 0.8452, "step": 4944 }, { "epoch": 0.17466631933559687, "grad_norm": 1.7678618431091309, "learning_rate": 9.461287052374786e-06, "loss": 0.8803, "step": 4945 }, { "epoch": 0.17470164113930478, "grad_norm": 2.085618734359741, "learning_rate": 9.461028745225023e-06, "loss": 0.8551, "step": 4946 }, { "epoch": 0.1747369629430127, "grad_norm": 2.0468411445617676, "learning_rate": 9.460770379690154e-06, "loss": 0.8808, "step": 4947 }, { "epoch": 0.1747722847467206, "grad_norm": 1.852847695350647, "learning_rate": 9.460511955773562e-06, "loss": 0.8617, "step": 4948 }, { "epoch": 0.17480760655042848, "grad_norm": 1.8144025802612305, "learning_rate": 9.460253473478627e-06, "loss": 0.8749, "step": 4949 }, { "epoch": 0.1748429283541364, "grad_norm": 1.6728136539459229, "learning_rate": 9.459994932808731e-06, "loss": 0.8608, "step": 4950 }, { "epoch": 0.1748782501578443, "grad_norm": 1.92396879196167, "learning_rate": 9.459736333767262e-06, "loss": 0.8757, "step": 4951 }, { "epoch": 0.17491357196155222, "grad_norm": 2.0623559951782227, "learning_rate": 9.459477676357603e-06, "loss": 0.8822, "step": 4952 }, { "epoch": 0.17494889376526013, "grad_norm": 1.8152878284454346, "learning_rate": 9.459218960583137e-06, "loss": 0.8598, "step": 4953 }, { "epoch": 0.17498421556896804, "grad_norm": 1.885914921760559, "learning_rate": 9.458960186447252e-06, "loss": 0.8276, "step": 4954 }, { "epoch": 0.17501953737267595, "grad_norm": 1.7728737592697144, "learning_rate": 9.458701353953333e-06, "loss": 0.8753, "step": 4955 }, { "epoch": 0.17505485917638383, "grad_norm": 1.9139819145202637, "learning_rate": 9.458442463104772e-06, "loss": 0.8544, "step": 4956 }, { "epoch": 0.17509018098009174, "grad_norm": 1.585746169090271, "learning_rate": 9.45818351390495e-06, "loss": 0.884, "step": 4957 }, { "epoch": 0.17512550278379965, "grad_norm": 1.7406944036483765, "learning_rate": 9.457924506357264e-06, "loss": 0.8735, "step": 4958 }, { "epoch": 0.17516082458750756, "grad_norm": 1.8206357955932617, "learning_rate": 9.457665440465097e-06, "loss": 0.8912, "step": 4959 }, { "epoch": 0.17519614639121547, "grad_norm": 1.8889795541763306, "learning_rate": 9.457406316231845e-06, "loss": 0.8513, "step": 4960 }, { "epoch": 0.17523146819492338, "grad_norm": 1.865714192390442, "learning_rate": 9.457147133660896e-06, "loss": 0.8423, "step": 4961 }, { "epoch": 0.17526678999863127, "grad_norm": 1.988797664642334, "learning_rate": 9.456887892755642e-06, "loss": 0.8376, "step": 4962 }, { "epoch": 0.17530211180233918, "grad_norm": 1.950392484664917, "learning_rate": 9.45662859351948e-06, "loss": 0.8683, "step": 4963 }, { "epoch": 0.1753374336060471, "grad_norm": 1.8401713371276855, "learning_rate": 9.456369235955798e-06, "loss": 0.9065, "step": 4964 }, { "epoch": 0.175372755409755, "grad_norm": 1.9172545671463013, "learning_rate": 9.456109820067994e-06, "loss": 0.9149, "step": 4965 }, { "epoch": 0.1754080772134629, "grad_norm": 1.630947470664978, "learning_rate": 9.455850345859462e-06, "loss": 0.8433, "step": 4966 }, { "epoch": 0.17544339901717082, "grad_norm": 2.2175424098968506, "learning_rate": 9.455590813333598e-06, "loss": 0.8632, "step": 4967 }, { "epoch": 0.17547872082087873, "grad_norm": 1.9098143577575684, "learning_rate": 9.4553312224938e-06, "loss": 0.8841, "step": 4968 }, { "epoch": 0.1755140426245866, "grad_norm": 2.0813021659851074, "learning_rate": 9.455071573343464e-06, "loss": 0.8472, "step": 4969 }, { "epoch": 0.17554936442829452, "grad_norm": 1.8188539743423462, "learning_rate": 9.45481186588599e-06, "loss": 0.8744, "step": 4970 }, { "epoch": 0.17558468623200243, "grad_norm": 1.8414167165756226, "learning_rate": 9.454552100124774e-06, "loss": 0.8697, "step": 4971 }, { "epoch": 0.17562000803571035, "grad_norm": 1.8046693801879883, "learning_rate": 9.454292276063216e-06, "loss": 0.8582, "step": 4972 }, { "epoch": 0.17565532983941826, "grad_norm": 1.6377801895141602, "learning_rate": 9.454032393704723e-06, "loss": 0.8359, "step": 4973 }, { "epoch": 0.17569065164312617, "grad_norm": 1.7736592292785645, "learning_rate": 9.453772453052688e-06, "loss": 0.8346, "step": 4974 }, { "epoch": 0.17572597344683405, "grad_norm": 1.8189607858657837, "learning_rate": 9.453512454110516e-06, "loss": 0.8277, "step": 4975 }, { "epoch": 0.17576129525054196, "grad_norm": 1.764135479927063, "learning_rate": 9.45325239688161e-06, "loss": 0.8479, "step": 4976 }, { "epoch": 0.17579661705424987, "grad_norm": 1.7904661893844604, "learning_rate": 9.452992281369375e-06, "loss": 0.8675, "step": 4977 }, { "epoch": 0.17583193885795778, "grad_norm": 1.8767765760421753, "learning_rate": 9.452732107577214e-06, "loss": 0.8836, "step": 4978 }, { "epoch": 0.1758672606616657, "grad_norm": 1.9237196445465088, "learning_rate": 9.452471875508532e-06, "loss": 0.8295, "step": 4979 }, { "epoch": 0.1759025824653736, "grad_norm": 1.7987008094787598, "learning_rate": 9.452211585166736e-06, "loss": 0.8405, "step": 4980 }, { "epoch": 0.1759379042690815, "grad_norm": 2.0450878143310547, "learning_rate": 9.451951236555231e-06, "loss": 0.8856, "step": 4981 }, { "epoch": 0.1759732260727894, "grad_norm": 1.8156709671020508, "learning_rate": 9.451690829677424e-06, "loss": 0.8326, "step": 4982 }, { "epoch": 0.1760085478764973, "grad_norm": 2.1898467540740967, "learning_rate": 9.451430364536725e-06, "loss": 0.8484, "step": 4983 }, { "epoch": 0.17604386968020522, "grad_norm": 1.9230525493621826, "learning_rate": 9.451169841136541e-06, "loss": 0.8701, "step": 4984 }, { "epoch": 0.17607919148391313, "grad_norm": 2.2284696102142334, "learning_rate": 9.450909259480286e-06, "loss": 0.8662, "step": 4985 }, { "epoch": 0.17611451328762104, "grad_norm": 2.1024832725524902, "learning_rate": 9.450648619571364e-06, "loss": 0.8401, "step": 4986 }, { "epoch": 0.17614983509132895, "grad_norm": 1.8423285484313965, "learning_rate": 9.450387921413192e-06, "loss": 0.8699, "step": 4987 }, { "epoch": 0.17618515689503683, "grad_norm": 1.9009705781936646, "learning_rate": 9.450127165009177e-06, "loss": 0.8786, "step": 4988 }, { "epoch": 0.17622047869874474, "grad_norm": 2.021261215209961, "learning_rate": 9.449866350362735e-06, "loss": 0.8631, "step": 4989 }, { "epoch": 0.17625580050245265, "grad_norm": 1.8602783679962158, "learning_rate": 9.449605477477279e-06, "loss": 0.8515, "step": 4990 }, { "epoch": 0.17629112230616056, "grad_norm": 1.7896111011505127, "learning_rate": 9.449344546356224e-06, "loss": 0.8583, "step": 4991 }, { "epoch": 0.17632644410986847, "grad_norm": 1.6961348056793213, "learning_rate": 9.449083557002982e-06, "loss": 0.8878, "step": 4992 }, { "epoch": 0.17636176591357638, "grad_norm": 1.7206130027770996, "learning_rate": 9.448822509420973e-06, "loss": 0.835, "step": 4993 }, { "epoch": 0.1763970877172843, "grad_norm": 2.2252495288848877, "learning_rate": 9.44856140361361e-06, "loss": 0.8399, "step": 4994 }, { "epoch": 0.17643240952099218, "grad_norm": 1.8201788663864136, "learning_rate": 9.448300239584312e-06, "loss": 0.8399, "step": 4995 }, { "epoch": 0.1764677313247001, "grad_norm": 1.8215585947036743, "learning_rate": 9.448039017336496e-06, "loss": 0.8772, "step": 4996 }, { "epoch": 0.176503053128408, "grad_norm": 1.7141700983047485, "learning_rate": 9.447777736873582e-06, "loss": 0.8262, "step": 4997 }, { "epoch": 0.1765383749321159, "grad_norm": 1.7235678434371948, "learning_rate": 9.447516398198989e-06, "loss": 0.829, "step": 4998 }, { "epoch": 0.17657369673582382, "grad_norm": 2.203792095184326, "learning_rate": 9.447255001316137e-06, "loss": 0.8727, "step": 4999 }, { "epoch": 0.17660901853953173, "grad_norm": 1.9730814695358276, "learning_rate": 9.446993546228448e-06, "loss": 0.8678, "step": 5000 }, { "epoch": 0.17664434034323961, "grad_norm": 1.7834265232086182, "learning_rate": 9.446732032939342e-06, "loss": 0.8544, "step": 5001 }, { "epoch": 0.17667966214694752, "grad_norm": 1.938446283340454, "learning_rate": 9.446470461452247e-06, "loss": 0.8423, "step": 5002 }, { "epoch": 0.17671498395065544, "grad_norm": 1.9417855739593506, "learning_rate": 9.446208831770579e-06, "loss": 0.8863, "step": 5003 }, { "epoch": 0.17675030575436335, "grad_norm": 2.1770431995391846, "learning_rate": 9.445947143897766e-06, "loss": 0.8916, "step": 5004 }, { "epoch": 0.17678562755807126, "grad_norm": 1.7719377279281616, "learning_rate": 9.445685397837232e-06, "loss": 0.853, "step": 5005 }, { "epoch": 0.17682094936177917, "grad_norm": 1.9983797073364258, "learning_rate": 9.445423593592404e-06, "loss": 0.8579, "step": 5006 }, { "epoch": 0.17685627116548708, "grad_norm": 1.996262550354004, "learning_rate": 9.445161731166708e-06, "loss": 0.8888, "step": 5007 }, { "epoch": 0.17689159296919496, "grad_norm": 1.80540931224823, "learning_rate": 9.444899810563572e-06, "loss": 0.882, "step": 5008 }, { "epoch": 0.17692691477290287, "grad_norm": 1.9364583492279053, "learning_rate": 9.44463783178642e-06, "loss": 0.8673, "step": 5009 }, { "epoch": 0.17696223657661078, "grad_norm": 2.6800928115844727, "learning_rate": 9.444375794838684e-06, "loss": 0.8278, "step": 5010 }, { "epoch": 0.1769975583803187, "grad_norm": 1.9027019739151, "learning_rate": 9.444113699723796e-06, "loss": 0.8489, "step": 5011 }, { "epoch": 0.1770328801840266, "grad_norm": 1.6974303722381592, "learning_rate": 9.443851546445182e-06, "loss": 0.8713, "step": 5012 }, { "epoch": 0.1770682019877345, "grad_norm": 1.8216605186462402, "learning_rate": 9.443589335006273e-06, "loss": 0.8491, "step": 5013 }, { "epoch": 0.1771035237914424, "grad_norm": 1.6825525760650635, "learning_rate": 9.443327065410502e-06, "loss": 0.8703, "step": 5014 }, { "epoch": 0.1771388455951503, "grad_norm": 1.7855985164642334, "learning_rate": 9.443064737661303e-06, "loss": 0.8623, "step": 5015 }, { "epoch": 0.17717416739885822, "grad_norm": 1.692976474761963, "learning_rate": 9.442802351762106e-06, "loss": 0.8517, "step": 5016 }, { "epoch": 0.17720948920256613, "grad_norm": 1.7894806861877441, "learning_rate": 9.44253990771635e-06, "loss": 0.8367, "step": 5017 }, { "epoch": 0.17724481100627404, "grad_norm": 10.391806602478027, "learning_rate": 9.442277405527464e-06, "loss": 0.9168, "step": 5018 }, { "epoch": 0.17728013280998195, "grad_norm": 1.867655634880066, "learning_rate": 9.442014845198888e-06, "loss": 0.8592, "step": 5019 }, { "epoch": 0.17731545461368986, "grad_norm": 1.6851434707641602, "learning_rate": 9.441752226734056e-06, "loss": 0.8417, "step": 5020 }, { "epoch": 0.17735077641739774, "grad_norm": 1.858893871307373, "learning_rate": 9.441489550136408e-06, "loss": 0.8665, "step": 5021 }, { "epoch": 0.17738609822110565, "grad_norm": 1.7138526439666748, "learning_rate": 9.441226815409377e-06, "loss": 0.8476, "step": 5022 }, { "epoch": 0.17742142002481356, "grad_norm": 1.8118797540664673, "learning_rate": 9.440964022556404e-06, "loss": 0.8501, "step": 5023 }, { "epoch": 0.17745674182852147, "grad_norm": 1.675083875656128, "learning_rate": 9.44070117158093e-06, "loss": 0.8436, "step": 5024 }, { "epoch": 0.17749206363222939, "grad_norm": 1.6165177822113037, "learning_rate": 9.440438262486394e-06, "loss": 0.8362, "step": 5025 }, { "epoch": 0.1775273854359373, "grad_norm": 1.9376784563064575, "learning_rate": 9.440175295276234e-06, "loss": 0.865, "step": 5026 }, { "epoch": 0.17756270723964518, "grad_norm": 1.6541199684143066, "learning_rate": 9.439912269953897e-06, "loss": 0.8362, "step": 5027 }, { "epoch": 0.1775980290433531, "grad_norm": 1.607694387435913, "learning_rate": 9.439649186522824e-06, "loss": 0.8082, "step": 5028 }, { "epoch": 0.177633350847061, "grad_norm": 1.9185469150543213, "learning_rate": 9.439386044986453e-06, "loss": 0.8825, "step": 5029 }, { "epoch": 0.1776686726507689, "grad_norm": 1.110137939453125, "learning_rate": 9.439122845348237e-06, "loss": 0.6125, "step": 5030 }, { "epoch": 0.17770399445447682, "grad_norm": 0.9642953872680664, "learning_rate": 9.438859587611612e-06, "loss": 0.6183, "step": 5031 }, { "epoch": 0.17773931625818473, "grad_norm": 1.985571265220642, "learning_rate": 9.438596271780029e-06, "loss": 0.8319, "step": 5032 }, { "epoch": 0.17777463806189264, "grad_norm": 2.2122058868408203, "learning_rate": 9.43833289785693e-06, "loss": 0.8638, "step": 5033 }, { "epoch": 0.17780995986560053, "grad_norm": 1.8563332557678223, "learning_rate": 9.438069465845767e-06, "loss": 0.8498, "step": 5034 }, { "epoch": 0.17784528166930844, "grad_norm": 1.9617531299591064, "learning_rate": 9.437805975749983e-06, "loss": 0.85, "step": 5035 }, { "epoch": 0.17788060347301635, "grad_norm": 1.7485501766204834, "learning_rate": 9.43754242757303e-06, "loss": 0.871, "step": 5036 }, { "epoch": 0.17791592527672426, "grad_norm": 1.8440626859664917, "learning_rate": 9.437278821318357e-06, "loss": 0.8774, "step": 5037 }, { "epoch": 0.17795124708043217, "grad_norm": 2.411978006362915, "learning_rate": 9.43701515698941e-06, "loss": 0.8678, "step": 5038 }, { "epoch": 0.17798656888414008, "grad_norm": 1.833374261856079, "learning_rate": 9.436751434589644e-06, "loss": 0.8385, "step": 5039 }, { "epoch": 0.17802189068784796, "grad_norm": 1.7454394102096558, "learning_rate": 9.43648765412251e-06, "loss": 0.8287, "step": 5040 }, { "epoch": 0.17805721249155587, "grad_norm": 1.7037886381149292, "learning_rate": 9.43622381559146e-06, "loss": 0.8649, "step": 5041 }, { "epoch": 0.17809253429526378, "grad_norm": 1.6960080862045288, "learning_rate": 9.435959918999943e-06, "loss": 0.8646, "step": 5042 }, { "epoch": 0.1781278560989717, "grad_norm": 1.8543506860733032, "learning_rate": 9.435695964351418e-06, "loss": 0.8932, "step": 5043 }, { "epoch": 0.1781631779026796, "grad_norm": 1.801566481590271, "learning_rate": 9.43543195164934e-06, "loss": 0.8718, "step": 5044 }, { "epoch": 0.17819849970638751, "grad_norm": 2.2778475284576416, "learning_rate": 9.435167880897162e-06, "loss": 0.8769, "step": 5045 }, { "epoch": 0.17823382151009542, "grad_norm": 1.8045705556869507, "learning_rate": 9.43490375209834e-06, "loss": 0.8473, "step": 5046 }, { "epoch": 0.1782691433138033, "grad_norm": 1.640001654624939, "learning_rate": 9.434639565256332e-06, "loss": 0.8442, "step": 5047 }, { "epoch": 0.17830446511751122, "grad_norm": 1.8144744634628296, "learning_rate": 9.434375320374594e-06, "loss": 0.8933, "step": 5048 }, { "epoch": 0.17833978692121913, "grad_norm": 1.5631742477416992, "learning_rate": 9.434111017456583e-06, "loss": 0.8404, "step": 5049 }, { "epoch": 0.17837510872492704, "grad_norm": 1.7536722421646118, "learning_rate": 9.433846656505766e-06, "loss": 0.8766, "step": 5050 }, { "epoch": 0.17841043052863495, "grad_norm": 1.649119257926941, "learning_rate": 9.433582237525594e-06, "loss": 0.8682, "step": 5051 }, { "epoch": 0.17844575233234286, "grad_norm": 1.756007432937622, "learning_rate": 9.433317760519531e-06, "loss": 0.8498, "step": 5052 }, { "epoch": 0.17848107413605074, "grad_norm": 1.6074804067611694, "learning_rate": 9.433053225491039e-06, "loss": 0.8502, "step": 5053 }, { "epoch": 0.17851639593975865, "grad_norm": 2.440490245819092, "learning_rate": 9.43278863244358e-06, "loss": 0.8604, "step": 5054 }, { "epoch": 0.17855171774346656, "grad_norm": 1.975780725479126, "learning_rate": 9.432523981380617e-06, "loss": 0.8802, "step": 5055 }, { "epoch": 0.17858703954717448, "grad_norm": 1.5639774799346924, "learning_rate": 9.432259272305613e-06, "loss": 0.8225, "step": 5056 }, { "epoch": 0.17862236135088239, "grad_norm": 2.1137797832489014, "learning_rate": 9.431994505222031e-06, "loss": 0.872, "step": 5057 }, { "epoch": 0.1786576831545903, "grad_norm": 1.6464565992355347, "learning_rate": 9.43172968013334e-06, "loss": 0.883, "step": 5058 }, { "epoch": 0.1786930049582982, "grad_norm": 1.725623607635498, "learning_rate": 9.431464797043003e-06, "loss": 0.8726, "step": 5059 }, { "epoch": 0.1787283267620061, "grad_norm": 1.8225347995758057, "learning_rate": 9.431199855954489e-06, "loss": 0.8377, "step": 5060 }, { "epoch": 0.178763648565714, "grad_norm": 1.9731310606002808, "learning_rate": 9.430934856871265e-06, "loss": 0.8613, "step": 5061 }, { "epoch": 0.1787989703694219, "grad_norm": 1.8283896446228027, "learning_rate": 9.430669799796796e-06, "loss": 0.869, "step": 5062 }, { "epoch": 0.17883429217312982, "grad_norm": 1.7631621360778809, "learning_rate": 9.430404684734555e-06, "loss": 0.8514, "step": 5063 }, { "epoch": 0.17886961397683773, "grad_norm": 1.8013859987258911, "learning_rate": 9.43013951168801e-06, "loss": 0.834, "step": 5064 }, { "epoch": 0.17890493578054564, "grad_norm": 1.7124837636947632, "learning_rate": 9.429874280660633e-06, "loss": 0.8767, "step": 5065 }, { "epoch": 0.17894025758425355, "grad_norm": 1.8917146921157837, "learning_rate": 9.429608991655894e-06, "loss": 0.8444, "step": 5066 }, { "epoch": 0.17897557938796144, "grad_norm": 1.7512032985687256, "learning_rate": 9.429343644677263e-06, "loss": 0.8761, "step": 5067 }, { "epoch": 0.17901090119166935, "grad_norm": 1.692115068435669, "learning_rate": 9.429078239728216e-06, "loss": 0.9271, "step": 5068 }, { "epoch": 0.17904622299537726, "grad_norm": 1.5427218675613403, "learning_rate": 9.428812776812227e-06, "loss": 0.8627, "step": 5069 }, { "epoch": 0.17908154479908517, "grad_norm": 1.7659770250320435, "learning_rate": 9.428547255932767e-06, "loss": 0.8462, "step": 5070 }, { "epoch": 0.17911686660279308, "grad_norm": 1.7438383102416992, "learning_rate": 9.428281677093314e-06, "loss": 0.8708, "step": 5071 }, { "epoch": 0.179152188406501, "grad_norm": 1.7461975812911987, "learning_rate": 9.428016040297342e-06, "loss": 0.8559, "step": 5072 }, { "epoch": 0.17918751021020887, "grad_norm": 1.7806029319763184, "learning_rate": 9.42775034554833e-06, "loss": 0.8464, "step": 5073 }, { "epoch": 0.17922283201391678, "grad_norm": 1.6003278493881226, "learning_rate": 9.427484592849753e-06, "loss": 0.8738, "step": 5074 }, { "epoch": 0.1792581538176247, "grad_norm": 1.6211649179458618, "learning_rate": 9.42721878220509e-06, "loss": 0.8777, "step": 5075 }, { "epoch": 0.1792934756213326, "grad_norm": 1.6607173681259155, "learning_rate": 9.42695291361782e-06, "loss": 0.8441, "step": 5076 }, { "epoch": 0.17932879742504051, "grad_norm": 1.7090836763381958, "learning_rate": 9.426686987091421e-06, "loss": 0.8442, "step": 5077 }, { "epoch": 0.17936411922874843, "grad_norm": 1.849307894706726, "learning_rate": 9.426421002629376e-06, "loss": 0.8436, "step": 5078 }, { "epoch": 0.17939944103245634, "grad_norm": 1.6668707132339478, "learning_rate": 9.426154960235166e-06, "loss": 0.8581, "step": 5079 }, { "epoch": 0.17943476283616422, "grad_norm": 1.8808945417404175, "learning_rate": 9.425888859912269e-06, "loss": 0.8755, "step": 5080 }, { "epoch": 0.17947008463987213, "grad_norm": 1.845088005065918, "learning_rate": 9.425622701664172e-06, "loss": 0.8499, "step": 5081 }, { "epoch": 0.17950540644358004, "grad_norm": 1.743117332458496, "learning_rate": 9.42535648549436e-06, "loss": 0.8521, "step": 5082 }, { "epoch": 0.17954072824728795, "grad_norm": 1.642554521560669, "learning_rate": 9.42509021140631e-06, "loss": 0.854, "step": 5083 }, { "epoch": 0.17957605005099586, "grad_norm": 1.7118512392044067, "learning_rate": 9.424823879403514e-06, "loss": 0.8756, "step": 5084 }, { "epoch": 0.17961137185470377, "grad_norm": 1.6034489870071411, "learning_rate": 9.424557489489453e-06, "loss": 0.8315, "step": 5085 }, { "epoch": 0.17964669365841165, "grad_norm": 1.9499640464782715, "learning_rate": 9.424291041667615e-06, "loss": 0.8632, "step": 5086 }, { "epoch": 0.17968201546211957, "grad_norm": 1.572414517402649, "learning_rate": 9.424024535941489e-06, "loss": 0.8402, "step": 5087 }, { "epoch": 0.17971733726582748, "grad_norm": 1.7518141269683838, "learning_rate": 9.42375797231456e-06, "loss": 0.846, "step": 5088 }, { "epoch": 0.1797526590695354, "grad_norm": 1.741776943206787, "learning_rate": 9.423491350790319e-06, "loss": 0.8363, "step": 5089 }, { "epoch": 0.1797879808732433, "grad_norm": 1.6854113340377808, "learning_rate": 9.423224671372255e-06, "loss": 0.8485, "step": 5090 }, { "epoch": 0.1798233026769512, "grad_norm": 1.5241585969924927, "learning_rate": 9.422957934063858e-06, "loss": 0.6558, "step": 5091 }, { "epoch": 0.17985862448065912, "grad_norm": 1.634872555732727, "learning_rate": 9.422691138868618e-06, "loss": 0.8448, "step": 5092 }, { "epoch": 0.179893946284367, "grad_norm": 1.9347572326660156, "learning_rate": 9.422424285790028e-06, "loss": 0.8443, "step": 5093 }, { "epoch": 0.1799292680880749, "grad_norm": 1.9188480377197266, "learning_rate": 9.422157374831581e-06, "loss": 0.8585, "step": 5094 }, { "epoch": 0.17996458989178282, "grad_norm": 1.76809823513031, "learning_rate": 9.421890405996769e-06, "loss": 0.8489, "step": 5095 }, { "epoch": 0.17999991169549073, "grad_norm": 1.7367404699325562, "learning_rate": 9.421623379289088e-06, "loss": 0.8861, "step": 5096 }, { "epoch": 0.18003523349919864, "grad_norm": 1.7536532878875732, "learning_rate": 9.421356294712031e-06, "loss": 0.9051, "step": 5097 }, { "epoch": 0.18007055530290655, "grad_norm": 1.7839150428771973, "learning_rate": 9.421089152269092e-06, "loss": 0.8649, "step": 5098 }, { "epoch": 0.18010587710661444, "grad_norm": 2.2174744606018066, "learning_rate": 9.42082195196377e-06, "loss": 0.8758, "step": 5099 }, { "epoch": 0.18014119891032235, "grad_norm": 1.7516666650772095, "learning_rate": 9.420554693799563e-06, "loss": 0.8908, "step": 5100 }, { "epoch": 0.18017652071403026, "grad_norm": 1.7031522989273071, "learning_rate": 9.420287377779967e-06, "loss": 0.8808, "step": 5101 }, { "epoch": 0.18021184251773817, "grad_norm": 1.7432751655578613, "learning_rate": 9.420020003908479e-06, "loss": 0.8688, "step": 5102 }, { "epoch": 0.18024716432144608, "grad_norm": 1.771824598312378, "learning_rate": 9.4197525721886e-06, "loss": 0.8686, "step": 5103 }, { "epoch": 0.180282486125154, "grad_norm": 2.0821585655212402, "learning_rate": 9.419485082623831e-06, "loss": 0.9031, "step": 5104 }, { "epoch": 0.1803178079288619, "grad_norm": 1.693410873413086, "learning_rate": 9.419217535217672e-06, "loss": 0.8509, "step": 5105 }, { "epoch": 0.18035312973256978, "grad_norm": 1.7768874168395996, "learning_rate": 9.418949929973624e-06, "loss": 0.869, "step": 5106 }, { "epoch": 0.1803884515362777, "grad_norm": 1.636234164237976, "learning_rate": 9.418682266895192e-06, "loss": 0.8754, "step": 5107 }, { "epoch": 0.1804237733399856, "grad_norm": 1.8586862087249756, "learning_rate": 9.418414545985875e-06, "loss": 0.861, "step": 5108 }, { "epoch": 0.18045909514369352, "grad_norm": 1.8061189651489258, "learning_rate": 9.418146767249179e-06, "loss": 0.9042, "step": 5109 }, { "epoch": 0.18049441694740143, "grad_norm": 1.6707878112792969, "learning_rate": 9.41787893068861e-06, "loss": 0.8479, "step": 5110 }, { "epoch": 0.18052973875110934, "grad_norm": 1.608719825744629, "learning_rate": 9.41761103630767e-06, "loss": 0.8551, "step": 5111 }, { "epoch": 0.18056506055481722, "grad_norm": 1.665961503982544, "learning_rate": 9.417343084109868e-06, "loss": 0.873, "step": 5112 }, { "epoch": 0.18060038235852513, "grad_norm": 1.6204229593276978, "learning_rate": 9.417075074098712e-06, "loss": 0.8377, "step": 5113 }, { "epoch": 0.18063570416223304, "grad_norm": 1.8072839975357056, "learning_rate": 9.416807006277707e-06, "loss": 0.8498, "step": 5114 }, { "epoch": 0.18067102596594095, "grad_norm": 1.6784976720809937, "learning_rate": 9.416538880650361e-06, "loss": 0.8431, "step": 5115 }, { "epoch": 0.18070634776964886, "grad_norm": 1.5903370380401611, "learning_rate": 9.416270697220184e-06, "loss": 0.8856, "step": 5116 }, { "epoch": 0.18074166957335677, "grad_norm": 1.7226476669311523, "learning_rate": 9.416002455990687e-06, "loss": 0.872, "step": 5117 }, { "epoch": 0.18077699137706468, "grad_norm": 1.7190115451812744, "learning_rate": 9.415734156965381e-06, "loss": 0.8342, "step": 5118 }, { "epoch": 0.18081231318077257, "grad_norm": 1.8193391561508179, "learning_rate": 9.415465800147775e-06, "loss": 0.828, "step": 5119 }, { "epoch": 0.18084763498448048, "grad_norm": 1.6762628555297852, "learning_rate": 9.415197385541384e-06, "loss": 0.8153, "step": 5120 }, { "epoch": 0.1808829567881884, "grad_norm": 1.8687835931777954, "learning_rate": 9.414928913149719e-06, "loss": 0.9004, "step": 5121 }, { "epoch": 0.1809182785918963, "grad_norm": 1.6885788440704346, "learning_rate": 9.414660382976293e-06, "loss": 0.8397, "step": 5122 }, { "epoch": 0.1809536003956042, "grad_norm": 1.5757250785827637, "learning_rate": 9.414391795024624e-06, "loss": 0.8641, "step": 5123 }, { "epoch": 0.18098892219931212, "grad_norm": 2.0781280994415283, "learning_rate": 9.414123149298224e-06, "loss": 0.8532, "step": 5124 }, { "epoch": 0.18102424400302, "grad_norm": 1.809731364250183, "learning_rate": 9.413854445800608e-06, "loss": 0.828, "step": 5125 }, { "epoch": 0.1810595658067279, "grad_norm": 1.6035575866699219, "learning_rate": 9.413585684535299e-06, "loss": 0.8664, "step": 5126 }, { "epoch": 0.18109488761043582, "grad_norm": 1.4966647624969482, "learning_rate": 9.413316865505806e-06, "loss": 0.8435, "step": 5127 }, { "epoch": 0.18113020941414373, "grad_norm": 1.8288438320159912, "learning_rate": 9.413047988715653e-06, "loss": 0.8666, "step": 5128 }, { "epoch": 0.18116553121785164, "grad_norm": 1.6019866466522217, "learning_rate": 9.412779054168357e-06, "loss": 0.8359, "step": 5129 }, { "epoch": 0.18120085302155955, "grad_norm": 1.706423044204712, "learning_rate": 9.41251006186744e-06, "loss": 0.8611, "step": 5130 }, { "epoch": 0.18123617482526747, "grad_norm": 1.6057387590408325, "learning_rate": 9.412241011816418e-06, "loss": 0.8794, "step": 5131 }, { "epoch": 0.18127149662897535, "grad_norm": 1.6610896587371826, "learning_rate": 9.411971904018818e-06, "loss": 0.864, "step": 5132 }, { "epoch": 0.18130681843268326, "grad_norm": 1.6625521183013916, "learning_rate": 9.411702738478159e-06, "loss": 0.9165, "step": 5133 }, { "epoch": 0.18134214023639117, "grad_norm": 1.7539091110229492, "learning_rate": 9.411433515197963e-06, "loss": 0.8975, "step": 5134 }, { "epoch": 0.18137746204009908, "grad_norm": 1.5974106788635254, "learning_rate": 9.411164234181754e-06, "loss": 0.8295, "step": 5135 }, { "epoch": 0.181412783843807, "grad_norm": 2.0494024753570557, "learning_rate": 9.410894895433056e-06, "loss": 0.8593, "step": 5136 }, { "epoch": 0.1814481056475149, "grad_norm": 1.852860927581787, "learning_rate": 9.410625498955396e-06, "loss": 0.8525, "step": 5137 }, { "epoch": 0.18148342745122278, "grad_norm": 1.7763580083847046, "learning_rate": 9.410356044752298e-06, "loss": 0.8317, "step": 5138 }, { "epoch": 0.1815187492549307, "grad_norm": 1.819777011871338, "learning_rate": 9.41008653282729e-06, "loss": 0.8581, "step": 5139 }, { "epoch": 0.1815540710586386, "grad_norm": 1.6660456657409668, "learning_rate": 9.409816963183897e-06, "loss": 0.8677, "step": 5140 }, { "epoch": 0.18158939286234652, "grad_norm": 2.0537936687469482, "learning_rate": 9.409547335825648e-06, "loss": 0.8686, "step": 5141 }, { "epoch": 0.18162471466605443, "grad_norm": 1.6974068880081177, "learning_rate": 9.409277650756074e-06, "loss": 0.839, "step": 5142 }, { "epoch": 0.18166003646976234, "grad_norm": 2.0240352153778076, "learning_rate": 9.409007907978703e-06, "loss": 0.8545, "step": 5143 }, { "epoch": 0.18169535827347025, "grad_norm": 1.8560856580734253, "learning_rate": 9.408738107497066e-06, "loss": 0.8854, "step": 5144 }, { "epoch": 0.18173068007717813, "grad_norm": 1.8585985898971558, "learning_rate": 9.408468249314692e-06, "loss": 0.8301, "step": 5145 }, { "epoch": 0.18176600188088604, "grad_norm": 1.8284302949905396, "learning_rate": 9.408198333435115e-06, "loss": 0.8773, "step": 5146 }, { "epoch": 0.18180132368459395, "grad_norm": 1.9596257209777832, "learning_rate": 9.407928359861866e-06, "loss": 0.8757, "step": 5147 }, { "epoch": 0.18183664548830186, "grad_norm": 1.9976911544799805, "learning_rate": 9.40765832859848e-06, "loss": 0.8465, "step": 5148 }, { "epoch": 0.18187196729200977, "grad_norm": 1.8892158269882202, "learning_rate": 9.40738823964849e-06, "loss": 0.8596, "step": 5149 }, { "epoch": 0.18190728909571768, "grad_norm": 1.8039860725402832, "learning_rate": 9.40711809301543e-06, "loss": 0.8438, "step": 5150 }, { "epoch": 0.18194261089942557, "grad_norm": 3.406069278717041, "learning_rate": 9.40684788870284e-06, "loss": 0.8421, "step": 5151 }, { "epoch": 0.18197793270313348, "grad_norm": 1.8839179277420044, "learning_rate": 9.406577626714251e-06, "loss": 0.8443, "step": 5152 }, { "epoch": 0.1820132545068414, "grad_norm": 1.5706562995910645, "learning_rate": 9.406307307053202e-06, "loss": 0.8426, "step": 5153 }, { "epoch": 0.1820485763105493, "grad_norm": 2.1277780532836914, "learning_rate": 9.406036929723232e-06, "loss": 0.8913, "step": 5154 }, { "epoch": 0.1820838981142572, "grad_norm": 1.5054930448532104, "learning_rate": 9.40576649472788e-06, "loss": 0.8441, "step": 5155 }, { "epoch": 0.18211921991796512, "grad_norm": 1.7776165008544922, "learning_rate": 9.405496002070681e-06, "loss": 0.8781, "step": 5156 }, { "epoch": 0.18215454172167303, "grad_norm": 1.6121037006378174, "learning_rate": 9.405225451755183e-06, "loss": 0.8672, "step": 5157 }, { "epoch": 0.1821898635253809, "grad_norm": 2.0627365112304688, "learning_rate": 9.40495484378492e-06, "loss": 0.8724, "step": 5158 }, { "epoch": 0.18222518532908882, "grad_norm": 1.678885817527771, "learning_rate": 9.404684178163434e-06, "loss": 0.8914, "step": 5159 }, { "epoch": 0.18226050713279673, "grad_norm": 1.5336464643478394, "learning_rate": 9.404413454894272e-06, "loss": 0.8327, "step": 5160 }, { "epoch": 0.18229582893650464, "grad_norm": 1.5980147123336792, "learning_rate": 9.404142673980974e-06, "loss": 0.8696, "step": 5161 }, { "epoch": 0.18233115074021256, "grad_norm": 1.627523422241211, "learning_rate": 9.403871835427085e-06, "loss": 0.8159, "step": 5162 }, { "epoch": 0.18236647254392047, "grad_norm": 1.5992028713226318, "learning_rate": 9.403600939236149e-06, "loss": 0.8411, "step": 5163 }, { "epoch": 0.18240179434762835, "grad_norm": 1.7498829364776611, "learning_rate": 9.403329985411711e-06, "loss": 0.8899, "step": 5164 }, { "epoch": 0.18243711615133626, "grad_norm": 1.594887614250183, "learning_rate": 9.403058973957319e-06, "loss": 0.845, "step": 5165 }, { "epoch": 0.18247243795504417, "grad_norm": 1.9541934728622437, "learning_rate": 9.40278790487652e-06, "loss": 0.8947, "step": 5166 }, { "epoch": 0.18250775975875208, "grad_norm": 1.7659263610839844, "learning_rate": 9.402516778172857e-06, "loss": 0.8448, "step": 5167 }, { "epoch": 0.18254308156246, "grad_norm": 1.6840721368789673, "learning_rate": 9.402245593849884e-06, "loss": 0.8524, "step": 5168 }, { "epoch": 0.1825784033661679, "grad_norm": 1.7000890970230103, "learning_rate": 9.401974351911149e-06, "loss": 0.8426, "step": 5169 }, { "epoch": 0.1826137251698758, "grad_norm": 1.518752932548523, "learning_rate": 9.4017030523602e-06, "loss": 0.8531, "step": 5170 }, { "epoch": 0.1826490469735837, "grad_norm": 1.9031016826629639, "learning_rate": 9.40143169520059e-06, "loss": 0.8674, "step": 5171 }, { "epoch": 0.1826843687772916, "grad_norm": 1.7726067304611206, "learning_rate": 9.401160280435868e-06, "loss": 0.8773, "step": 5172 }, { "epoch": 0.18271969058099952, "grad_norm": 1.6519330739974976, "learning_rate": 9.400888808069588e-06, "loss": 0.8333, "step": 5173 }, { "epoch": 0.18275501238470743, "grad_norm": 1.804125428199768, "learning_rate": 9.400617278105304e-06, "loss": 0.8849, "step": 5174 }, { "epoch": 0.18279033418841534, "grad_norm": 1.716661810874939, "learning_rate": 9.400345690546567e-06, "loss": 0.8476, "step": 5175 }, { "epoch": 0.18282565599212325, "grad_norm": 1.4604225158691406, "learning_rate": 9.400074045396931e-06, "loss": 0.8292, "step": 5176 }, { "epoch": 0.18286097779583113, "grad_norm": 2.1052443981170654, "learning_rate": 9.399802342659957e-06, "loss": 0.8715, "step": 5177 }, { "epoch": 0.18289629959953904, "grad_norm": 1.7224644422531128, "learning_rate": 9.399530582339194e-06, "loss": 0.8655, "step": 5178 }, { "epoch": 0.18293162140324695, "grad_norm": 1.7046494483947754, "learning_rate": 9.399258764438202e-06, "loss": 0.858, "step": 5179 }, { "epoch": 0.18296694320695486, "grad_norm": 1.5881673097610474, "learning_rate": 9.398986888960539e-06, "loss": 0.9009, "step": 5180 }, { "epoch": 0.18300226501066277, "grad_norm": 1.719137191772461, "learning_rate": 9.398714955909763e-06, "loss": 0.867, "step": 5181 }, { "epoch": 0.18303758681437068, "grad_norm": 1.8335540294647217, "learning_rate": 9.39844296528943e-06, "loss": 0.8916, "step": 5182 }, { "epoch": 0.1830729086180786, "grad_norm": 1.5618008375167847, "learning_rate": 9.398170917103105e-06, "loss": 0.8295, "step": 5183 }, { "epoch": 0.18310823042178648, "grad_norm": 1.860273003578186, "learning_rate": 9.397898811354344e-06, "loss": 0.8454, "step": 5184 }, { "epoch": 0.1831435522254944, "grad_norm": 1.8068360090255737, "learning_rate": 9.397626648046711e-06, "loss": 0.9092, "step": 5185 }, { "epoch": 0.1831788740292023, "grad_norm": 1.7209094762802124, "learning_rate": 9.397354427183767e-06, "loss": 0.8682, "step": 5186 }, { "epoch": 0.1832141958329102, "grad_norm": 1.7061899900436401, "learning_rate": 9.397082148769075e-06, "loss": 0.8545, "step": 5187 }, { "epoch": 0.18324951763661812, "grad_norm": 1.7831368446350098, "learning_rate": 9.396809812806198e-06, "loss": 0.8842, "step": 5188 }, { "epoch": 0.18328483944032603, "grad_norm": 1.7780455350875854, "learning_rate": 9.396537419298702e-06, "loss": 0.8817, "step": 5189 }, { "epoch": 0.1833201612440339, "grad_norm": 1.6866602897644043, "learning_rate": 9.39626496825015e-06, "loss": 0.8853, "step": 5190 }, { "epoch": 0.18335548304774182, "grad_norm": 1.7615915536880493, "learning_rate": 9.395992459664109e-06, "loss": 0.8683, "step": 5191 }, { "epoch": 0.18339080485144973, "grad_norm": 1.8762106895446777, "learning_rate": 9.395719893544144e-06, "loss": 0.8679, "step": 5192 }, { "epoch": 0.18342612665515765, "grad_norm": 1.7316035032272339, "learning_rate": 9.395447269893823e-06, "loss": 0.8755, "step": 5193 }, { "epoch": 0.18346144845886556, "grad_norm": 1.7368464469909668, "learning_rate": 9.395174588716716e-06, "loss": 0.8633, "step": 5194 }, { "epoch": 0.18349677026257347, "grad_norm": 1.6584242582321167, "learning_rate": 9.394901850016389e-06, "loss": 0.8598, "step": 5195 }, { "epoch": 0.18353209206628138, "grad_norm": 1.8060518503189087, "learning_rate": 9.394629053796412e-06, "loss": 0.8577, "step": 5196 }, { "epoch": 0.18356741386998926, "grad_norm": 1.7039192914962769, "learning_rate": 9.394356200060357e-06, "loss": 0.8516, "step": 5197 }, { "epoch": 0.18360273567369717, "grad_norm": 1.8521658182144165, "learning_rate": 9.394083288811795e-06, "loss": 0.8654, "step": 5198 }, { "epoch": 0.18363805747740508, "grad_norm": 1.836920976638794, "learning_rate": 9.393810320054295e-06, "loss": 0.8573, "step": 5199 }, { "epoch": 0.183673379281113, "grad_norm": 1.7308579683303833, "learning_rate": 9.393537293791433e-06, "loss": 0.8226, "step": 5200 }, { "epoch": 0.1837087010848209, "grad_norm": 1.9186614751815796, "learning_rate": 9.39326421002678e-06, "loss": 0.8908, "step": 5201 }, { "epoch": 0.1837440228885288, "grad_norm": 1.7666213512420654, "learning_rate": 9.392991068763912e-06, "loss": 0.8387, "step": 5202 }, { "epoch": 0.1837793446922367, "grad_norm": 1.9282667636871338, "learning_rate": 9.3927178700064e-06, "loss": 0.8633, "step": 5203 }, { "epoch": 0.1838146664959446, "grad_norm": 1.8180781602859497, "learning_rate": 9.392444613757824e-06, "loss": 0.8761, "step": 5204 }, { "epoch": 0.18384998829965252, "grad_norm": 1.9218626022338867, "learning_rate": 9.39217130002176e-06, "loss": 0.9101, "step": 5205 }, { "epoch": 0.18388531010336043, "grad_norm": 1.9756004810333252, "learning_rate": 9.391897928801784e-06, "loss": 0.8541, "step": 5206 }, { "epoch": 0.18392063190706834, "grad_norm": 1.8255867958068848, "learning_rate": 9.39162450010147e-06, "loss": 0.8765, "step": 5207 }, { "epoch": 0.18395595371077625, "grad_norm": 1.6870695352554321, "learning_rate": 9.3913510139244e-06, "loss": 0.863, "step": 5208 }, { "epoch": 0.18399127551448416, "grad_norm": 1.9198005199432373, "learning_rate": 9.391077470274157e-06, "loss": 0.8719, "step": 5209 }, { "epoch": 0.18402659731819204, "grad_norm": 1.7860902547836304, "learning_rate": 9.390803869154315e-06, "loss": 0.8514, "step": 5210 }, { "epoch": 0.18406191912189995, "grad_norm": 1.6989326477050781, "learning_rate": 9.390530210568458e-06, "loss": 0.8509, "step": 5211 }, { "epoch": 0.18409724092560786, "grad_norm": 1.7896803617477417, "learning_rate": 9.39025649452017e-06, "loss": 0.8906, "step": 5212 }, { "epoch": 0.18413256272931577, "grad_norm": 1.6735649108886719, "learning_rate": 9.389982721013026e-06, "loss": 0.8534, "step": 5213 }, { "epoch": 0.18416788453302368, "grad_norm": 1.7127010822296143, "learning_rate": 9.389708890050614e-06, "loss": 0.8717, "step": 5214 }, { "epoch": 0.1842032063367316, "grad_norm": 1.8641924858093262, "learning_rate": 9.389435001636518e-06, "loss": 0.8627, "step": 5215 }, { "epoch": 0.18423852814043948, "grad_norm": 1.6519532203674316, "learning_rate": 9.389161055774322e-06, "loss": 0.8468, "step": 5216 }, { "epoch": 0.1842738499441474, "grad_norm": 1.822336196899414, "learning_rate": 9.388887052467611e-06, "loss": 0.8621, "step": 5217 }, { "epoch": 0.1843091717478553, "grad_norm": 1.9484398365020752, "learning_rate": 9.388612991719972e-06, "loss": 0.8605, "step": 5218 }, { "epoch": 0.1843444935515632, "grad_norm": 1.6372277736663818, "learning_rate": 9.38833887353499e-06, "loss": 0.8433, "step": 5219 }, { "epoch": 0.18437981535527112, "grad_norm": 2.080605983734131, "learning_rate": 9.388064697916253e-06, "loss": 0.8783, "step": 5220 }, { "epoch": 0.18441513715897903, "grad_norm": 1.84473717212677, "learning_rate": 9.387790464867353e-06, "loss": 0.8639, "step": 5221 }, { "epoch": 0.18445045896268694, "grad_norm": 1.8305596113204956, "learning_rate": 9.387516174391873e-06, "loss": 0.8661, "step": 5222 }, { "epoch": 0.18448578076639482, "grad_norm": 1.7952015399932861, "learning_rate": 9.387241826493407e-06, "loss": 0.8417, "step": 5223 }, { "epoch": 0.18452110257010274, "grad_norm": 2.0520706176757812, "learning_rate": 9.386967421175545e-06, "loss": 0.8469, "step": 5224 }, { "epoch": 0.18455642437381065, "grad_norm": 1.627245545387268, "learning_rate": 9.386692958441879e-06, "loss": 0.8327, "step": 5225 }, { "epoch": 0.18459174617751856, "grad_norm": 1.682978868484497, "learning_rate": 9.386418438296e-06, "loss": 0.8525, "step": 5226 }, { "epoch": 0.18462706798122647, "grad_norm": 1.834741234779358, "learning_rate": 9.3861438607415e-06, "loss": 0.8939, "step": 5227 }, { "epoch": 0.18466238978493438, "grad_norm": 1.6248526573181152, "learning_rate": 9.385869225781975e-06, "loss": 0.8213, "step": 5228 }, { "epoch": 0.18469771158864226, "grad_norm": 1.5559256076812744, "learning_rate": 9.385594533421017e-06, "loss": 0.8173, "step": 5229 }, { "epoch": 0.18473303339235017, "grad_norm": 1.6551342010498047, "learning_rate": 9.385319783662221e-06, "loss": 0.8299, "step": 5230 }, { "epoch": 0.18476835519605808, "grad_norm": 1.7521159648895264, "learning_rate": 9.385044976509185e-06, "loss": 0.8732, "step": 5231 }, { "epoch": 0.184803676999766, "grad_norm": 1.70657479763031, "learning_rate": 9.384770111965506e-06, "loss": 0.8453, "step": 5232 }, { "epoch": 0.1848389988034739, "grad_norm": 1.6879884004592896, "learning_rate": 9.384495190034779e-06, "loss": 0.8603, "step": 5233 }, { "epoch": 0.1848743206071818, "grad_norm": 1.720996618270874, "learning_rate": 9.384220210720603e-06, "loss": 0.8575, "step": 5234 }, { "epoch": 0.18490964241088972, "grad_norm": 1.6928333044052124, "learning_rate": 9.383945174026576e-06, "loss": 0.8635, "step": 5235 }, { "epoch": 0.1849449642145976, "grad_norm": 1.8277778625488281, "learning_rate": 9.383670079956301e-06, "loss": 0.8981, "step": 5236 }, { "epoch": 0.18498028601830552, "grad_norm": 1.6490654945373535, "learning_rate": 9.383394928513374e-06, "loss": 0.8436, "step": 5237 }, { "epoch": 0.18501560782201343, "grad_norm": 1.6174367666244507, "learning_rate": 9.383119719701398e-06, "loss": 0.8132, "step": 5238 }, { "epoch": 0.18505092962572134, "grad_norm": 2.089996099472046, "learning_rate": 9.382844453523978e-06, "loss": 0.8613, "step": 5239 }, { "epoch": 0.18508625142942925, "grad_norm": 1.8314728736877441, "learning_rate": 9.382569129984711e-06, "loss": 0.8572, "step": 5240 }, { "epoch": 0.18512157323313716, "grad_norm": 1.7114408016204834, "learning_rate": 9.382293749087203e-06, "loss": 0.8519, "step": 5241 }, { "epoch": 0.18515689503684504, "grad_norm": 1.653923749923706, "learning_rate": 9.382018310835059e-06, "loss": 0.8849, "step": 5242 }, { "epoch": 0.18519221684055295, "grad_norm": 1.8732681274414062, "learning_rate": 9.381742815231885e-06, "loss": 0.836, "step": 5243 }, { "epoch": 0.18522753864426086, "grad_norm": 9.141631126403809, "learning_rate": 9.381467262281282e-06, "loss": 0.8513, "step": 5244 }, { "epoch": 0.18526286044796877, "grad_norm": 3.0327348709106445, "learning_rate": 9.38119165198686e-06, "loss": 0.8534, "step": 5245 }, { "epoch": 0.18529818225167669, "grad_norm": 1.8264579772949219, "learning_rate": 9.380915984352224e-06, "loss": 0.8532, "step": 5246 }, { "epoch": 0.1853335040553846, "grad_norm": 1.7706953287124634, "learning_rate": 9.380640259380984e-06, "loss": 0.8924, "step": 5247 }, { "epoch": 0.1853688258590925, "grad_norm": 2.217600107192993, "learning_rate": 9.38036447707675e-06, "loss": 0.923, "step": 5248 }, { "epoch": 0.1854041476628004, "grad_norm": 2.742358446121216, "learning_rate": 9.380088637443126e-06, "loss": 0.8651, "step": 5249 }, { "epoch": 0.1854394694665083, "grad_norm": 1.8235151767730713, "learning_rate": 9.379812740483727e-06, "loss": 0.8931, "step": 5250 }, { "epoch": 0.1854747912702162, "grad_norm": 1.8667255640029907, "learning_rate": 9.379536786202162e-06, "loss": 0.8947, "step": 5251 }, { "epoch": 0.18551011307392412, "grad_norm": 1.770323634147644, "learning_rate": 9.379260774602041e-06, "loss": 0.8517, "step": 5252 }, { "epoch": 0.18554543487763203, "grad_norm": 1.9395910501480103, "learning_rate": 9.37898470568698e-06, "loss": 0.8601, "step": 5253 }, { "epoch": 0.18558075668133994, "grad_norm": 1.7460170984268188, "learning_rate": 9.37870857946059e-06, "loss": 0.8522, "step": 5254 }, { "epoch": 0.18561607848504783, "grad_norm": 2.014665365219116, "learning_rate": 9.378432395926485e-06, "loss": 0.875, "step": 5255 }, { "epoch": 0.18565140028875574, "grad_norm": 2.055966854095459, "learning_rate": 9.37815615508828e-06, "loss": 0.8642, "step": 5256 }, { "epoch": 0.18568672209246365, "grad_norm": 1.7562838792800903, "learning_rate": 9.37787985694959e-06, "loss": 0.829, "step": 5257 }, { "epoch": 0.18572204389617156, "grad_norm": 2.1034860610961914, "learning_rate": 9.377603501514032e-06, "loss": 0.8515, "step": 5258 }, { "epoch": 0.18575736569987947, "grad_norm": 2.0548243522644043, "learning_rate": 9.377327088785222e-06, "loss": 0.8937, "step": 5259 }, { "epoch": 0.18579268750358738, "grad_norm": 2.0674006938934326, "learning_rate": 9.377050618766777e-06, "loss": 0.8849, "step": 5260 }, { "epoch": 0.1858280093072953, "grad_norm": 1.9588865041732788, "learning_rate": 9.376774091462318e-06, "loss": 0.8674, "step": 5261 }, { "epoch": 0.18586333111100317, "grad_norm": 1.723175048828125, "learning_rate": 9.37649750687546e-06, "loss": 0.8696, "step": 5262 }, { "epoch": 0.18589865291471108, "grad_norm": 1.8732566833496094, "learning_rate": 9.376220865009827e-06, "loss": 0.8299, "step": 5263 }, { "epoch": 0.185933974718419, "grad_norm": 2.9233551025390625, "learning_rate": 9.375944165869037e-06, "loss": 0.8295, "step": 5264 }, { "epoch": 0.1859692965221269, "grad_norm": 1.8952547311782837, "learning_rate": 9.375667409456715e-06, "loss": 0.8741, "step": 5265 }, { "epoch": 0.18600461832583481, "grad_norm": 1.7875099182128906, "learning_rate": 9.375390595776478e-06, "loss": 0.8451, "step": 5266 }, { "epoch": 0.18603994012954272, "grad_norm": 2.0742955207824707, "learning_rate": 9.375113724831953e-06, "loss": 0.8712, "step": 5267 }, { "epoch": 0.18607526193325064, "grad_norm": 1.784383773803711, "learning_rate": 9.37483679662676e-06, "loss": 0.8127, "step": 5268 }, { "epoch": 0.18611058373695852, "grad_norm": 2.0300168991088867, "learning_rate": 9.374559811164525e-06, "loss": 0.8801, "step": 5269 }, { "epoch": 0.18614590554066643, "grad_norm": 1.7623207569122314, "learning_rate": 9.374282768448876e-06, "loss": 0.8471, "step": 5270 }, { "epoch": 0.18618122734437434, "grad_norm": 1.902241826057434, "learning_rate": 9.374005668483434e-06, "loss": 0.8671, "step": 5271 }, { "epoch": 0.18621654914808225, "grad_norm": 1.9137815237045288, "learning_rate": 9.373728511271831e-06, "loss": 0.8802, "step": 5272 }, { "epoch": 0.18625187095179016, "grad_norm": 1.7420819997787476, "learning_rate": 9.373451296817689e-06, "loss": 0.8389, "step": 5273 }, { "epoch": 0.18628719275549807, "grad_norm": 2.0566956996917725, "learning_rate": 9.37317402512464e-06, "loss": 0.8399, "step": 5274 }, { "epoch": 0.18632251455920595, "grad_norm": 1.6168004274368286, "learning_rate": 9.37289669619631e-06, "loss": 0.8583, "step": 5275 }, { "epoch": 0.18635783636291386, "grad_norm": 2.1141223907470703, "learning_rate": 9.37261931003633e-06, "loss": 0.8548, "step": 5276 }, { "epoch": 0.18639315816662178, "grad_norm": 2.035076379776001, "learning_rate": 9.372341866648332e-06, "loss": 0.8636, "step": 5277 }, { "epoch": 0.18642847997032969, "grad_norm": 1.7367315292358398, "learning_rate": 9.372064366035945e-06, "loss": 0.831, "step": 5278 }, { "epoch": 0.1864638017740376, "grad_norm": 2.961599588394165, "learning_rate": 9.371786808202801e-06, "loss": 0.8556, "step": 5279 }, { "epoch": 0.1864991235777455, "grad_norm": 2.0424017906188965, "learning_rate": 9.371509193152534e-06, "loss": 0.8951, "step": 5280 }, { "epoch": 0.18653444538145342, "grad_norm": 1.7725573778152466, "learning_rate": 9.371231520888775e-06, "loss": 0.8912, "step": 5281 }, { "epoch": 0.1865697671851613, "grad_norm": 2.167496919631958, "learning_rate": 9.37095379141516e-06, "loss": 0.8257, "step": 5282 }, { "epoch": 0.1866050889888692, "grad_norm": 2.3638737201690674, "learning_rate": 9.370676004735323e-06, "loss": 0.8829, "step": 5283 }, { "epoch": 0.18664041079257712, "grad_norm": 1.8507100343704224, "learning_rate": 9.370398160852902e-06, "loss": 0.8618, "step": 5284 }, { "epoch": 0.18667573259628503, "grad_norm": 1.8958067893981934, "learning_rate": 9.370120259771529e-06, "loss": 0.8319, "step": 5285 }, { "epoch": 0.18671105439999294, "grad_norm": 1.7674096822738647, "learning_rate": 9.369842301494845e-06, "loss": 0.8506, "step": 5286 }, { "epoch": 0.18674637620370085, "grad_norm": 1.8565700054168701, "learning_rate": 9.369564286026486e-06, "loss": 0.8471, "step": 5287 }, { "epoch": 0.18678169800740874, "grad_norm": 1.9833004474639893, "learning_rate": 9.369286213370092e-06, "loss": 0.8612, "step": 5288 }, { "epoch": 0.18681701981111665, "grad_norm": 1.7774025201797485, "learning_rate": 9.3690080835293e-06, "loss": 0.8499, "step": 5289 }, { "epoch": 0.18685234161482456, "grad_norm": 1.8483072519302368, "learning_rate": 9.36872989650775e-06, "loss": 0.8219, "step": 5290 }, { "epoch": 0.18688766341853247, "grad_norm": 1.6591824293136597, "learning_rate": 9.368451652309088e-06, "loss": 0.8599, "step": 5291 }, { "epoch": 0.18692298522224038, "grad_norm": 1.7686578035354614, "learning_rate": 9.36817335093695e-06, "loss": 0.8722, "step": 5292 }, { "epoch": 0.1869583070259483, "grad_norm": 1.8057444095611572, "learning_rate": 9.367894992394981e-06, "loss": 0.859, "step": 5293 }, { "epoch": 0.1869936288296562, "grad_norm": 2.0998165607452393, "learning_rate": 9.367616576686822e-06, "loss": 0.8826, "step": 5294 }, { "epoch": 0.18702895063336408, "grad_norm": 3.771636486053467, "learning_rate": 9.367338103816119e-06, "loss": 0.8555, "step": 5295 }, { "epoch": 0.187064272437072, "grad_norm": 1.8249695301055908, "learning_rate": 9.367059573786517e-06, "loss": 0.8554, "step": 5296 }, { "epoch": 0.1870995942407799, "grad_norm": 2.0113914012908936, "learning_rate": 9.36678098660166e-06, "loss": 0.8789, "step": 5297 }, { "epoch": 0.18713491604448781, "grad_norm": 1.73661470413208, "learning_rate": 9.366502342265192e-06, "loss": 0.842, "step": 5298 }, { "epoch": 0.18717023784819573, "grad_norm": 1.7476963996887207, "learning_rate": 9.366223640780765e-06, "loss": 0.8323, "step": 5299 }, { "epoch": 0.18720555965190364, "grad_norm": 2.1324448585510254, "learning_rate": 9.365944882152022e-06, "loss": 0.8673, "step": 5300 }, { "epoch": 0.18724088145561152, "grad_norm": 1.9797779321670532, "learning_rate": 9.365666066382614e-06, "loss": 0.8306, "step": 5301 }, { "epoch": 0.18727620325931943, "grad_norm": 3.227484941482544, "learning_rate": 9.365387193476187e-06, "loss": 0.8644, "step": 5302 }, { "epoch": 0.18731152506302734, "grad_norm": 1.8430067300796509, "learning_rate": 9.365108263436396e-06, "loss": 0.8729, "step": 5303 }, { "epoch": 0.18734684686673525, "grad_norm": 1.7852836847305298, "learning_rate": 9.364829276266887e-06, "loss": 0.8939, "step": 5304 }, { "epoch": 0.18738216867044316, "grad_norm": 1.6546438932418823, "learning_rate": 9.364550231971314e-06, "loss": 0.9028, "step": 5305 }, { "epoch": 0.18741749047415107, "grad_norm": 1.926039695739746, "learning_rate": 9.364271130553327e-06, "loss": 0.8925, "step": 5306 }, { "epoch": 0.18745281227785898, "grad_norm": 2.089923620223999, "learning_rate": 9.36399197201658e-06, "loss": 0.8756, "step": 5307 }, { "epoch": 0.18748813408156687, "grad_norm": 1.9458496570587158, "learning_rate": 9.363712756364727e-06, "loss": 0.8682, "step": 5308 }, { "epoch": 0.18752345588527478, "grad_norm": 1.5696077346801758, "learning_rate": 9.363433483601421e-06, "loss": 0.8595, "step": 5309 }, { "epoch": 0.1875587776889827, "grad_norm": 1.7485469579696655, "learning_rate": 9.363154153730318e-06, "loss": 0.8691, "step": 5310 }, { "epoch": 0.1875940994926906, "grad_norm": 1.9032127857208252, "learning_rate": 9.362874766755075e-06, "loss": 0.8706, "step": 5311 }, { "epoch": 0.1876294212963985, "grad_norm": 1.8487317562103271, "learning_rate": 9.362595322679345e-06, "loss": 0.8492, "step": 5312 }, { "epoch": 0.18766474310010642, "grad_norm": 1.6262726783752441, "learning_rate": 9.362315821506789e-06, "loss": 0.8608, "step": 5313 }, { "epoch": 0.1877000649038143, "grad_norm": 1.7907501459121704, "learning_rate": 9.362036263241063e-06, "loss": 0.8827, "step": 5314 }, { "epoch": 0.1877353867075222, "grad_norm": 1.7937672138214111, "learning_rate": 9.361756647885825e-06, "loss": 0.847, "step": 5315 }, { "epoch": 0.18777070851123012, "grad_norm": 2.2236876487731934, "learning_rate": 9.361476975444737e-06, "loss": 0.8363, "step": 5316 }, { "epoch": 0.18780603031493803, "grad_norm": 1.9484981298446655, "learning_rate": 9.361197245921459e-06, "loss": 0.8756, "step": 5317 }, { "epoch": 0.18784135211864594, "grad_norm": 1.8108786344528198, "learning_rate": 9.360917459319652e-06, "loss": 0.8756, "step": 5318 }, { "epoch": 0.18787667392235385, "grad_norm": 1.80105721950531, "learning_rate": 9.360637615642976e-06, "loss": 0.8367, "step": 5319 }, { "epoch": 0.18791199572606176, "grad_norm": 1.875434160232544, "learning_rate": 9.360357714895093e-06, "loss": 0.8452, "step": 5320 }, { "epoch": 0.18794731752976965, "grad_norm": 1.94161057472229, "learning_rate": 9.36007775707967e-06, "loss": 0.8311, "step": 5321 }, { "epoch": 0.18798263933347756, "grad_norm": 1.8131545782089233, "learning_rate": 9.359797742200367e-06, "loss": 0.8424, "step": 5322 }, { "epoch": 0.18801796113718547, "grad_norm": 1.9008136987686157, "learning_rate": 9.359517670260853e-06, "loss": 0.9019, "step": 5323 }, { "epoch": 0.18805328294089338, "grad_norm": 1.837764024734497, "learning_rate": 9.35923754126479e-06, "loss": 0.9169, "step": 5324 }, { "epoch": 0.1880886047446013, "grad_norm": 1.7721831798553467, "learning_rate": 9.358957355215845e-06, "loss": 0.8001, "step": 5325 }, { "epoch": 0.1881239265483092, "grad_norm": 1.9314749240875244, "learning_rate": 9.358677112117687e-06, "loss": 0.8474, "step": 5326 }, { "epoch": 0.18815924835201708, "grad_norm": 1.66409432888031, "learning_rate": 9.358396811973982e-06, "loss": 0.8462, "step": 5327 }, { "epoch": 0.188194570155725, "grad_norm": 1.9107731580734253, "learning_rate": 9.358116454788398e-06, "loss": 0.866, "step": 5328 }, { "epoch": 0.1882298919594329, "grad_norm": 1.6460750102996826, "learning_rate": 9.357836040564603e-06, "loss": 0.8338, "step": 5329 }, { "epoch": 0.18826521376314082, "grad_norm": 1.7209705114364624, "learning_rate": 9.357555569306274e-06, "loss": 0.8133, "step": 5330 }, { "epoch": 0.18830053556684873, "grad_norm": 1.7617781162261963, "learning_rate": 9.357275041017073e-06, "loss": 0.8424, "step": 5331 }, { "epoch": 0.18833585737055664, "grad_norm": 1.8367904424667358, "learning_rate": 9.356994455700677e-06, "loss": 0.8444, "step": 5332 }, { "epoch": 0.18837117917426455, "grad_norm": 2.0959525108337402, "learning_rate": 9.356713813360755e-06, "loss": 0.8674, "step": 5333 }, { "epoch": 0.18840650097797243, "grad_norm": 2.0785720348358154, "learning_rate": 9.356433114000983e-06, "loss": 0.8637, "step": 5334 }, { "epoch": 0.18844182278168034, "grad_norm": 1.7765508890151978, "learning_rate": 9.356152357625034e-06, "loss": 0.8373, "step": 5335 }, { "epoch": 0.18847714458538825, "grad_norm": 1.6874064207077026, "learning_rate": 9.35587154423658e-06, "loss": 0.8162, "step": 5336 }, { "epoch": 0.18851246638909616, "grad_norm": 1.8341434001922607, "learning_rate": 9.355590673839299e-06, "loss": 0.8764, "step": 5337 }, { "epoch": 0.18854778819280407, "grad_norm": 1.7574329376220703, "learning_rate": 9.355309746436866e-06, "loss": 0.8587, "step": 5338 }, { "epoch": 0.18858310999651198, "grad_norm": 1.5701243877410889, "learning_rate": 9.355028762032958e-06, "loss": 0.8193, "step": 5339 }, { "epoch": 0.18861843180021987, "grad_norm": 1.8134185075759888, "learning_rate": 9.354747720631253e-06, "loss": 0.8596, "step": 5340 }, { "epoch": 0.18865375360392778, "grad_norm": 1.708284616470337, "learning_rate": 9.354466622235426e-06, "loss": 0.8794, "step": 5341 }, { "epoch": 0.1886890754076357, "grad_norm": 1.7840421199798584, "learning_rate": 9.354185466849161e-06, "loss": 0.8509, "step": 5342 }, { "epoch": 0.1887243972113436, "grad_norm": 1.9604235887527466, "learning_rate": 9.353904254476134e-06, "loss": 0.8833, "step": 5343 }, { "epoch": 0.1887597190150515, "grad_norm": 1.8309955596923828, "learning_rate": 9.353622985120026e-06, "loss": 0.8644, "step": 5344 }, { "epoch": 0.18879504081875942, "grad_norm": 2.0394251346588135, "learning_rate": 9.353341658784518e-06, "loss": 0.8545, "step": 5345 }, { "epoch": 0.18883036262246733, "grad_norm": 1.7144182920455933, "learning_rate": 9.353060275473294e-06, "loss": 0.8395, "step": 5346 }, { "epoch": 0.1888656844261752, "grad_norm": 1.8867305517196655, "learning_rate": 9.352778835190037e-06, "loss": 0.8909, "step": 5347 }, { "epoch": 0.18890100622988312, "grad_norm": 1.919466257095337, "learning_rate": 9.352497337938427e-06, "loss": 0.8811, "step": 5348 }, { "epoch": 0.18893632803359103, "grad_norm": 1.9755891561508179, "learning_rate": 9.35221578372215e-06, "loss": 0.8539, "step": 5349 }, { "epoch": 0.18897164983729894, "grad_norm": 1.9430011510849, "learning_rate": 9.35193417254489e-06, "loss": 0.8477, "step": 5350 }, { "epoch": 0.18900697164100685, "grad_norm": 1.920128345489502, "learning_rate": 9.351652504410335e-06, "loss": 0.8488, "step": 5351 }, { "epoch": 0.18904229344471477, "grad_norm": 1.8044220209121704, "learning_rate": 9.35137077932217e-06, "loss": 0.8819, "step": 5352 }, { "epoch": 0.18907761524842265, "grad_norm": 1.7341035604476929, "learning_rate": 9.351088997284081e-06, "loss": 0.863, "step": 5353 }, { "epoch": 0.18911293705213056, "grad_norm": 2.0073790550231934, "learning_rate": 9.35080715829976e-06, "loss": 0.8661, "step": 5354 }, { "epoch": 0.18914825885583847, "grad_norm": 1.781086802482605, "learning_rate": 9.350525262372889e-06, "loss": 0.8764, "step": 5355 }, { "epoch": 0.18918358065954638, "grad_norm": 1.7820210456848145, "learning_rate": 9.350243309507164e-06, "loss": 0.8744, "step": 5356 }, { "epoch": 0.1892189024632543, "grad_norm": 1.700703501701355, "learning_rate": 9.349961299706271e-06, "loss": 0.8342, "step": 5357 }, { "epoch": 0.1892542242669622, "grad_norm": 1.747257947921753, "learning_rate": 9.349679232973902e-06, "loss": 0.8645, "step": 5358 }, { "epoch": 0.1892895460706701, "grad_norm": 1.7843921184539795, "learning_rate": 9.34939710931375e-06, "loss": 0.9006, "step": 5359 }, { "epoch": 0.189324867874378, "grad_norm": 1.8524035215377808, "learning_rate": 9.349114928729505e-06, "loss": 0.854, "step": 5360 }, { "epoch": 0.1893601896780859, "grad_norm": 2.193591833114624, "learning_rate": 9.348832691224862e-06, "loss": 0.8554, "step": 5361 }, { "epoch": 0.18939551148179382, "grad_norm": 1.7240045070648193, "learning_rate": 9.348550396803515e-06, "loss": 0.8592, "step": 5362 }, { "epoch": 0.18943083328550173, "grad_norm": 1.8001179695129395, "learning_rate": 9.348268045469158e-06, "loss": 0.8625, "step": 5363 }, { "epoch": 0.18946615508920964, "grad_norm": 1.9241067171096802, "learning_rate": 9.347985637225485e-06, "loss": 0.8322, "step": 5364 }, { "epoch": 0.18950147689291755, "grad_norm": 1.9645988941192627, "learning_rate": 9.347703172076194e-06, "loss": 0.8578, "step": 5365 }, { "epoch": 0.18953679869662543, "grad_norm": 1.6498926877975464, "learning_rate": 9.34742065002498e-06, "loss": 0.828, "step": 5366 }, { "epoch": 0.18957212050033334, "grad_norm": 2.016541004180908, "learning_rate": 9.347138071075544e-06, "loss": 0.8877, "step": 5367 }, { "epoch": 0.18960744230404125, "grad_norm": 1.898903727531433, "learning_rate": 9.346855435231578e-06, "loss": 0.8848, "step": 5368 }, { "epoch": 0.18964276410774916, "grad_norm": 1.743855357170105, "learning_rate": 9.346572742496789e-06, "loss": 0.8426, "step": 5369 }, { "epoch": 0.18967808591145707, "grad_norm": 1.7324261665344238, "learning_rate": 9.346289992874872e-06, "loss": 0.8414, "step": 5370 }, { "epoch": 0.18971340771516498, "grad_norm": 1.8770751953125, "learning_rate": 9.346007186369528e-06, "loss": 0.8568, "step": 5371 }, { "epoch": 0.1897487295188729, "grad_norm": 1.7436271905899048, "learning_rate": 9.345724322984459e-06, "loss": 0.8668, "step": 5372 }, { "epoch": 0.18978405132258078, "grad_norm": 1.6429829597473145, "learning_rate": 9.345441402723367e-06, "loss": 0.8479, "step": 5373 }, { "epoch": 0.1898193731262887, "grad_norm": 1.6773194074630737, "learning_rate": 9.345158425589954e-06, "loss": 0.8285, "step": 5374 }, { "epoch": 0.1898546949299966, "grad_norm": 1.8505908250808716, "learning_rate": 9.344875391587928e-06, "loss": 0.8837, "step": 5375 }, { "epoch": 0.1898900167337045, "grad_norm": 1.6631712913513184, "learning_rate": 9.344592300720988e-06, "loss": 0.8326, "step": 5376 }, { "epoch": 0.18992533853741242, "grad_norm": 1.8817157745361328, "learning_rate": 9.344309152992838e-06, "loss": 0.8762, "step": 5377 }, { "epoch": 0.18996066034112033, "grad_norm": 1.8272348642349243, "learning_rate": 9.344025948407188e-06, "loss": 0.8765, "step": 5378 }, { "epoch": 0.1899959821448282, "grad_norm": 1.7156836986541748, "learning_rate": 9.343742686967742e-06, "loss": 0.8855, "step": 5379 }, { "epoch": 0.19003130394853612, "grad_norm": 1.6639598608016968, "learning_rate": 9.343459368678211e-06, "loss": 0.8327, "step": 5380 }, { "epoch": 0.19006662575224403, "grad_norm": 1.6752293109893799, "learning_rate": 9.343175993542299e-06, "loss": 0.8268, "step": 5381 }, { "epoch": 0.19010194755595194, "grad_norm": 2.109898328781128, "learning_rate": 9.342892561563714e-06, "loss": 0.8753, "step": 5382 }, { "epoch": 0.19013726935965986, "grad_norm": 1.9209094047546387, "learning_rate": 9.34260907274617e-06, "loss": 0.9061, "step": 5383 }, { "epoch": 0.19017259116336777, "grad_norm": 1.6525909900665283, "learning_rate": 9.342325527093374e-06, "loss": 0.8616, "step": 5384 }, { "epoch": 0.19020791296707568, "grad_norm": 1.6304678916931152, "learning_rate": 9.342041924609038e-06, "loss": 0.8373, "step": 5385 }, { "epoch": 0.19024323477078356, "grad_norm": 1.76826012134552, "learning_rate": 9.341758265296873e-06, "loss": 0.8368, "step": 5386 }, { "epoch": 0.19027855657449147, "grad_norm": 1.7272121906280518, "learning_rate": 9.341474549160592e-06, "loss": 0.8472, "step": 5387 }, { "epoch": 0.19031387837819938, "grad_norm": 1.9439969062805176, "learning_rate": 9.341190776203909e-06, "loss": 0.8452, "step": 5388 }, { "epoch": 0.1903492001819073, "grad_norm": 1.9989129304885864, "learning_rate": 9.340906946430536e-06, "loss": 0.9233, "step": 5389 }, { "epoch": 0.1903845219856152, "grad_norm": 1.7828094959259033, "learning_rate": 9.34062305984419e-06, "loss": 0.8435, "step": 5390 }, { "epoch": 0.1904198437893231, "grad_norm": 1.7745591402053833, "learning_rate": 9.340339116448585e-06, "loss": 0.8485, "step": 5391 }, { "epoch": 0.190455165593031, "grad_norm": 1.800696849822998, "learning_rate": 9.340055116247438e-06, "loss": 0.8848, "step": 5392 }, { "epoch": 0.1904904873967389, "grad_norm": 1.1282365322113037, "learning_rate": 9.339771059244463e-06, "loss": 0.6124, "step": 5393 }, { "epoch": 0.19052580920044682, "grad_norm": 2.0460355281829834, "learning_rate": 9.339486945443384e-06, "loss": 0.8802, "step": 5394 }, { "epoch": 0.19056113100415473, "grad_norm": 1.9194047451019287, "learning_rate": 9.339202774847913e-06, "loss": 0.863, "step": 5395 }, { "epoch": 0.19059645280786264, "grad_norm": 1.9550291299819946, "learning_rate": 9.338918547461772e-06, "loss": 0.9115, "step": 5396 }, { "epoch": 0.19063177461157055, "grad_norm": 1.9761735200881958, "learning_rate": 9.33863426328868e-06, "loss": 0.8811, "step": 5397 }, { "epoch": 0.19066709641527846, "grad_norm": 2.017287015914917, "learning_rate": 9.33834992233236e-06, "loss": 0.8635, "step": 5398 }, { "epoch": 0.19070241821898634, "grad_norm": 0.9047868251800537, "learning_rate": 9.338065524596531e-06, "loss": 0.5874, "step": 5399 }, { "epoch": 0.19073774002269425, "grad_norm": 1.6048102378845215, "learning_rate": 9.337781070084916e-06, "loss": 0.8393, "step": 5400 }, { "epoch": 0.19077306182640216, "grad_norm": 1.8732774257659912, "learning_rate": 9.337496558801238e-06, "loss": 0.8545, "step": 5401 }, { "epoch": 0.19080838363011007, "grad_norm": 1.7065823078155518, "learning_rate": 9.33721199074922e-06, "loss": 0.8648, "step": 5402 }, { "epoch": 0.19084370543381798, "grad_norm": 1.844674825668335, "learning_rate": 9.336927365932586e-06, "loss": 0.8843, "step": 5403 }, { "epoch": 0.1908790272375259, "grad_norm": 1.6934292316436768, "learning_rate": 9.336642684355063e-06, "loss": 0.8653, "step": 5404 }, { "epoch": 0.19091434904123378, "grad_norm": 1.8298158645629883, "learning_rate": 9.336357946020375e-06, "loss": 0.8448, "step": 5405 }, { "epoch": 0.1909496708449417, "grad_norm": 1.6887922286987305, "learning_rate": 9.336073150932252e-06, "loss": 0.8808, "step": 5406 }, { "epoch": 0.1909849926486496, "grad_norm": 1.6545112133026123, "learning_rate": 9.335788299094417e-06, "loss": 0.8711, "step": 5407 }, { "epoch": 0.1910203144523575, "grad_norm": 1.6880695819854736, "learning_rate": 9.335503390510599e-06, "loss": 0.8927, "step": 5408 }, { "epoch": 0.19105563625606542, "grad_norm": 1.5898951292037964, "learning_rate": 9.335218425184526e-06, "loss": 0.8284, "step": 5409 }, { "epoch": 0.19109095805977333, "grad_norm": 1.6744383573532104, "learning_rate": 9.334933403119932e-06, "loss": 0.8776, "step": 5410 }, { "epoch": 0.19112627986348124, "grad_norm": 1.686209797859192, "learning_rate": 9.334648324320543e-06, "loss": 0.8645, "step": 5411 }, { "epoch": 0.19116160166718912, "grad_norm": 1.6115899085998535, "learning_rate": 9.334363188790094e-06, "loss": 0.8508, "step": 5412 }, { "epoch": 0.19119692347089703, "grad_norm": 7.646852493286133, "learning_rate": 9.33407799653231e-06, "loss": 0.8414, "step": 5413 }, { "epoch": 0.19123224527460495, "grad_norm": 1.7601407766342163, "learning_rate": 9.333792747550933e-06, "loss": 0.8708, "step": 5414 }, { "epoch": 0.19126756707831286, "grad_norm": 1.852976679801941, "learning_rate": 9.333507441849687e-06, "loss": 0.8149, "step": 5415 }, { "epoch": 0.19130288888202077, "grad_norm": 1.7501834630966187, "learning_rate": 9.333222079432313e-06, "loss": 0.8835, "step": 5416 }, { "epoch": 0.19133821068572868, "grad_norm": 1.9241498708724976, "learning_rate": 9.332936660302542e-06, "loss": 0.8654, "step": 5417 }, { "epoch": 0.19137353248943656, "grad_norm": 2.118453025817871, "learning_rate": 9.33265118446411e-06, "loss": 0.8533, "step": 5418 }, { "epoch": 0.19140885429314447, "grad_norm": 1.6901229619979858, "learning_rate": 9.332365651920754e-06, "loss": 0.8611, "step": 5419 }, { "epoch": 0.19144417609685238, "grad_norm": 1.9345974922180176, "learning_rate": 9.332080062676212e-06, "loss": 0.8463, "step": 5420 }, { "epoch": 0.1914794979005603, "grad_norm": 1.7564446926116943, "learning_rate": 9.33179441673422e-06, "loss": 0.8134, "step": 5421 }, { "epoch": 0.1915148197042682, "grad_norm": 2.014399528503418, "learning_rate": 9.331508714098515e-06, "loss": 0.8582, "step": 5422 }, { "epoch": 0.1915501415079761, "grad_norm": 1.801261305809021, "learning_rate": 9.33122295477284e-06, "loss": 0.877, "step": 5423 }, { "epoch": 0.19158546331168402, "grad_norm": 1.599765419960022, "learning_rate": 9.330937138760931e-06, "loss": 0.8499, "step": 5424 }, { "epoch": 0.1916207851153919, "grad_norm": 1.731366515159607, "learning_rate": 9.330651266066533e-06, "loss": 0.8549, "step": 5425 }, { "epoch": 0.19165610691909982, "grad_norm": 1.9002562761306763, "learning_rate": 9.330365336693386e-06, "loss": 0.8352, "step": 5426 }, { "epoch": 0.19169142872280773, "grad_norm": 1.6247797012329102, "learning_rate": 9.33007935064523e-06, "loss": 0.8289, "step": 5427 }, { "epoch": 0.19172675052651564, "grad_norm": 1.7228271961212158, "learning_rate": 9.32979330792581e-06, "loss": 0.8602, "step": 5428 }, { "epoch": 0.19176207233022355, "grad_norm": 1.8362452983856201, "learning_rate": 9.329507208538868e-06, "loss": 0.8495, "step": 5429 }, { "epoch": 0.19179739413393146, "grad_norm": 1.8757590055465698, "learning_rate": 9.32922105248815e-06, "loss": 0.8633, "step": 5430 }, { "epoch": 0.19183271593763934, "grad_norm": 1.7764837741851807, "learning_rate": 9.328934839777401e-06, "loss": 0.8365, "step": 5431 }, { "epoch": 0.19186803774134725, "grad_norm": 1.7889131307601929, "learning_rate": 9.328648570410366e-06, "loss": 0.8608, "step": 5432 }, { "epoch": 0.19190335954505516, "grad_norm": 1.9953478574752808, "learning_rate": 9.328362244390794e-06, "loss": 0.896, "step": 5433 }, { "epoch": 0.19193868134876307, "grad_norm": 2.3475520610809326, "learning_rate": 9.32807586172243e-06, "loss": 0.8325, "step": 5434 }, { "epoch": 0.19197400315247098, "grad_norm": 1.9949179887771606, "learning_rate": 9.327789422409021e-06, "loss": 0.8504, "step": 5435 }, { "epoch": 0.1920093249561789, "grad_norm": 1.8212412595748901, "learning_rate": 9.327502926454318e-06, "loss": 0.848, "step": 5436 }, { "epoch": 0.1920446467598868, "grad_norm": 2.0745019912719727, "learning_rate": 9.32721637386207e-06, "loss": 0.8291, "step": 5437 }, { "epoch": 0.1920799685635947, "grad_norm": 2.0274081230163574, "learning_rate": 9.32692976463603e-06, "loss": 0.8422, "step": 5438 }, { "epoch": 0.1921152903673026, "grad_norm": 1.9597491025924683, "learning_rate": 9.326643098779945e-06, "loss": 0.8469, "step": 5439 }, { "epoch": 0.1921506121710105, "grad_norm": 2.095378875732422, "learning_rate": 9.326356376297567e-06, "loss": 0.8631, "step": 5440 }, { "epoch": 0.19218593397471842, "grad_norm": 1.642290711402893, "learning_rate": 9.326069597192652e-06, "loss": 0.8534, "step": 5441 }, { "epoch": 0.19222125577842633, "grad_norm": 1.7800551652908325, "learning_rate": 9.32578276146895e-06, "loss": 0.8309, "step": 5442 }, { "epoch": 0.19225657758213424, "grad_norm": 2.2529079914093018, "learning_rate": 9.32549586913022e-06, "loss": 0.8462, "step": 5443 }, { "epoch": 0.19229189938584212, "grad_norm": 1.8151508569717407, "learning_rate": 9.325208920180209e-06, "loss": 0.84, "step": 5444 }, { "epoch": 0.19232722118955003, "grad_norm": 1.749343752861023, "learning_rate": 9.324921914622678e-06, "loss": 0.8891, "step": 5445 }, { "epoch": 0.19236254299325795, "grad_norm": 1.8629385232925415, "learning_rate": 9.32463485246138e-06, "loss": 0.8406, "step": 5446 }, { "epoch": 0.19239786479696586, "grad_norm": 1.6924870014190674, "learning_rate": 9.324347733700076e-06, "loss": 0.8365, "step": 5447 }, { "epoch": 0.19243318660067377, "grad_norm": 1.9355016946792603, "learning_rate": 9.324060558342523e-06, "loss": 0.8125, "step": 5448 }, { "epoch": 0.19246850840438168, "grad_norm": 1.6712403297424316, "learning_rate": 9.323773326392477e-06, "loss": 0.8341, "step": 5449 }, { "epoch": 0.1925038302080896, "grad_norm": 1.5411241054534912, "learning_rate": 9.323486037853697e-06, "loss": 0.8088, "step": 5450 }, { "epoch": 0.19253915201179747, "grad_norm": 1.8180909156799316, "learning_rate": 9.323198692729947e-06, "loss": 0.8364, "step": 5451 }, { "epoch": 0.19257447381550538, "grad_norm": 1.676535964012146, "learning_rate": 9.322911291024985e-06, "loss": 0.856, "step": 5452 }, { "epoch": 0.1926097956192133, "grad_norm": 1.8286670446395874, "learning_rate": 9.32262383274257e-06, "loss": 0.8944, "step": 5453 }, { "epoch": 0.1926451174229212, "grad_norm": 2.0098838806152344, "learning_rate": 9.322336317886468e-06, "loss": 0.8783, "step": 5454 }, { "epoch": 0.1926804392266291, "grad_norm": 1.7624229192733765, "learning_rate": 9.322048746460442e-06, "loss": 0.8697, "step": 5455 }, { "epoch": 0.19271576103033702, "grad_norm": 1.7797191143035889, "learning_rate": 9.321761118468255e-06, "loss": 0.8652, "step": 5456 }, { "epoch": 0.1927510828340449, "grad_norm": 2.0050466060638428, "learning_rate": 9.321473433913669e-06, "loss": 0.8412, "step": 5457 }, { "epoch": 0.19278640463775282, "grad_norm": 1.9165208339691162, "learning_rate": 9.321185692800452e-06, "loss": 0.8465, "step": 5458 }, { "epoch": 0.19282172644146073, "grad_norm": 1.7967524528503418, "learning_rate": 9.320897895132368e-06, "loss": 0.8654, "step": 5459 }, { "epoch": 0.19285704824516864, "grad_norm": 1.669969081878662, "learning_rate": 9.320610040913186e-06, "loss": 0.8706, "step": 5460 }, { "epoch": 0.19289237004887655, "grad_norm": 1.8102498054504395, "learning_rate": 9.320322130146671e-06, "loss": 0.8229, "step": 5461 }, { "epoch": 0.19292769185258446, "grad_norm": 1.6802833080291748, "learning_rate": 9.320034162836594e-06, "loss": 0.8486, "step": 5462 }, { "epoch": 0.19296301365629237, "grad_norm": 1.5994993448257446, "learning_rate": 9.319746138986718e-06, "loss": 0.879, "step": 5463 }, { "epoch": 0.19299833546000025, "grad_norm": 1.631670594215393, "learning_rate": 9.319458058600819e-06, "loss": 0.8395, "step": 5464 }, { "epoch": 0.19303365726370816, "grad_norm": 1.5466562509536743, "learning_rate": 9.319169921682665e-06, "loss": 0.8825, "step": 5465 }, { "epoch": 0.19306897906741607, "grad_norm": 1.7526016235351562, "learning_rate": 9.318881728236027e-06, "loss": 0.8673, "step": 5466 }, { "epoch": 0.19310430087112399, "grad_norm": 1.508365273475647, "learning_rate": 9.318593478264677e-06, "loss": 0.841, "step": 5467 }, { "epoch": 0.1931396226748319, "grad_norm": 1.5878974199295044, "learning_rate": 9.318305171772385e-06, "loss": 0.8438, "step": 5468 }, { "epoch": 0.1931749444785398, "grad_norm": 1.664609670639038, "learning_rate": 9.31801680876293e-06, "loss": 0.8583, "step": 5469 }, { "epoch": 0.19321026628224772, "grad_norm": 1.6203911304473877, "learning_rate": 9.317728389240081e-06, "loss": 0.876, "step": 5470 }, { "epoch": 0.1932455880859556, "grad_norm": 1.6146080493927002, "learning_rate": 9.317439913207615e-06, "loss": 0.8754, "step": 5471 }, { "epoch": 0.1932809098896635, "grad_norm": 1.6736493110656738, "learning_rate": 9.317151380669307e-06, "loss": 0.8602, "step": 5472 }, { "epoch": 0.19331623169337142, "grad_norm": 1.7198764085769653, "learning_rate": 9.316862791628934e-06, "loss": 0.8661, "step": 5473 }, { "epoch": 0.19335155349707933, "grad_norm": 1.586081862449646, "learning_rate": 9.31657414609027e-06, "loss": 0.8576, "step": 5474 }, { "epoch": 0.19338687530078724, "grad_norm": 1.923326015472412, "learning_rate": 9.316285444057095e-06, "loss": 0.8543, "step": 5475 }, { "epoch": 0.19342219710449515, "grad_norm": 1.9188988208770752, "learning_rate": 9.315996685533191e-06, "loss": 0.8829, "step": 5476 }, { "epoch": 0.19345751890820304, "grad_norm": 1.8771183490753174, "learning_rate": 9.31570787052233e-06, "loss": 0.8494, "step": 5477 }, { "epoch": 0.19349284071191095, "grad_norm": 1.6240251064300537, "learning_rate": 9.315418999028297e-06, "loss": 0.8559, "step": 5478 }, { "epoch": 0.19352816251561886, "grad_norm": 1.6050360202789307, "learning_rate": 9.315130071054872e-06, "loss": 0.8391, "step": 5479 }, { "epoch": 0.19356348431932677, "grad_norm": 1.8108662366867065, "learning_rate": 9.314841086605836e-06, "loss": 0.8268, "step": 5480 }, { "epoch": 0.19359880612303468, "grad_norm": 1.7039259672164917, "learning_rate": 9.31455204568497e-06, "loss": 0.855, "step": 5481 }, { "epoch": 0.1936341279267426, "grad_norm": 1.8324311971664429, "learning_rate": 9.314262948296057e-06, "loss": 0.8393, "step": 5482 }, { "epoch": 0.1936694497304505, "grad_norm": 1.8357948064804077, "learning_rate": 9.313973794442883e-06, "loss": 0.8506, "step": 5483 }, { "epoch": 0.19370477153415838, "grad_norm": 1.0977811813354492, "learning_rate": 9.31368458412923e-06, "loss": 0.6001, "step": 5484 }, { "epoch": 0.1937400933378663, "grad_norm": 1.7483168840408325, "learning_rate": 9.313395317358886e-06, "loss": 0.8339, "step": 5485 }, { "epoch": 0.1937754151415742, "grad_norm": 1.9011131525039673, "learning_rate": 9.313105994135632e-06, "loss": 0.866, "step": 5486 }, { "epoch": 0.1938107369452821, "grad_norm": 1.6270389556884766, "learning_rate": 9.31281661446326e-06, "loss": 0.8795, "step": 5487 }, { "epoch": 0.19384605874899002, "grad_norm": 1.7963426113128662, "learning_rate": 9.312527178345553e-06, "loss": 0.8479, "step": 5488 }, { "epoch": 0.19388138055269794, "grad_norm": 1.7854751348495483, "learning_rate": 9.312237685786302e-06, "loss": 0.8525, "step": 5489 }, { "epoch": 0.19391670235640582, "grad_norm": 1.9250725507736206, "learning_rate": 9.311948136789295e-06, "loss": 0.8515, "step": 5490 }, { "epoch": 0.19395202416011373, "grad_norm": 1.5616061687469482, "learning_rate": 9.311658531358321e-06, "loss": 0.833, "step": 5491 }, { "epoch": 0.19398734596382164, "grad_norm": 1.7496215105056763, "learning_rate": 9.31136886949717e-06, "loss": 0.832, "step": 5492 }, { "epoch": 0.19402266776752955, "grad_norm": 1.7352262735366821, "learning_rate": 9.311079151209634e-06, "loss": 0.8475, "step": 5493 }, { "epoch": 0.19405798957123746, "grad_norm": 1.622144341468811, "learning_rate": 9.310789376499505e-06, "loss": 0.8811, "step": 5494 }, { "epoch": 0.19409331137494537, "grad_norm": 1.7260907888412476, "learning_rate": 9.310499545370575e-06, "loss": 0.869, "step": 5495 }, { "epoch": 0.19412863317865328, "grad_norm": 1.572668433189392, "learning_rate": 9.310209657826636e-06, "loss": 0.8456, "step": 5496 }, { "epoch": 0.19416395498236116, "grad_norm": 1.8253718614578247, "learning_rate": 9.309919713871486e-06, "loss": 0.8993, "step": 5497 }, { "epoch": 0.19419927678606907, "grad_norm": 2.025808334350586, "learning_rate": 9.309629713508914e-06, "loss": 0.8734, "step": 5498 }, { "epoch": 0.19423459858977699, "grad_norm": 1.7869983911514282, "learning_rate": 9.309339656742719e-06, "loss": 0.8754, "step": 5499 }, { "epoch": 0.1942699203934849, "grad_norm": 1.7343381643295288, "learning_rate": 9.309049543576698e-06, "loss": 0.8709, "step": 5500 }, { "epoch": 0.1943052421971928, "grad_norm": 1.7284339666366577, "learning_rate": 9.308759374014646e-06, "loss": 0.87, "step": 5501 }, { "epoch": 0.19434056400090072, "grad_norm": 1.594658613204956, "learning_rate": 9.308469148060362e-06, "loss": 0.8541, "step": 5502 }, { "epoch": 0.1943758858046086, "grad_norm": 1.6503533124923706, "learning_rate": 9.308178865717644e-06, "loss": 0.8648, "step": 5503 }, { "epoch": 0.1944112076083165, "grad_norm": 1.5915197134017944, "learning_rate": 9.307888526990288e-06, "loss": 0.8505, "step": 5504 }, { "epoch": 0.19444652941202442, "grad_norm": 1.7112284898757935, "learning_rate": 9.307598131882098e-06, "loss": 0.8652, "step": 5505 }, { "epoch": 0.19448185121573233, "grad_norm": 1.712674856185913, "learning_rate": 9.307307680396874e-06, "loss": 0.8909, "step": 5506 }, { "epoch": 0.19451717301944024, "grad_norm": 1.7776380777359009, "learning_rate": 9.307017172538417e-06, "loss": 0.8501, "step": 5507 }, { "epoch": 0.19455249482314815, "grad_norm": 1.5642651319503784, "learning_rate": 9.306726608310528e-06, "loss": 0.8234, "step": 5508 }, { "epoch": 0.19458781662685606, "grad_norm": 1.747894287109375, "learning_rate": 9.30643598771701e-06, "loss": 0.8666, "step": 5509 }, { "epoch": 0.19462313843056395, "grad_norm": 1.8002487421035767, "learning_rate": 9.306145310761669e-06, "loss": 0.8435, "step": 5510 }, { "epoch": 0.19465846023427186, "grad_norm": 1.816051959991455, "learning_rate": 9.305854577448307e-06, "loss": 0.8784, "step": 5511 }, { "epoch": 0.19469378203797977, "grad_norm": 2.086076259613037, "learning_rate": 9.305563787780731e-06, "loss": 0.8656, "step": 5512 }, { "epoch": 0.19472910384168768, "grad_norm": 1.7004159688949585, "learning_rate": 9.305272941762744e-06, "loss": 0.8586, "step": 5513 }, { "epoch": 0.1947644256453956, "grad_norm": 1.731619954109192, "learning_rate": 9.304982039398154e-06, "loss": 0.8755, "step": 5514 }, { "epoch": 0.1947997474491035, "grad_norm": 1.930248737335205, "learning_rate": 9.304691080690769e-06, "loss": 0.8635, "step": 5515 }, { "epoch": 0.19483506925281138, "grad_norm": 2.0016417503356934, "learning_rate": 9.304400065644396e-06, "loss": 0.8384, "step": 5516 }, { "epoch": 0.1948703910565193, "grad_norm": 1.8382573127746582, "learning_rate": 9.304108994262843e-06, "loss": 0.8611, "step": 5517 }, { "epoch": 0.1949057128602272, "grad_norm": 1.7653703689575195, "learning_rate": 9.303817866549922e-06, "loss": 0.8524, "step": 5518 }, { "epoch": 0.19494103466393511, "grad_norm": 1.7488412857055664, "learning_rate": 9.303526682509443e-06, "loss": 0.8464, "step": 5519 }, { "epoch": 0.19497635646764302, "grad_norm": 3.5741798877716064, "learning_rate": 9.303235442145213e-06, "loss": 0.8346, "step": 5520 }, { "epoch": 0.19501167827135094, "grad_norm": 1.7369792461395264, "learning_rate": 9.302944145461048e-06, "loss": 0.8276, "step": 5521 }, { "epoch": 0.19504700007505885, "grad_norm": 1.7719258069992065, "learning_rate": 9.302652792460758e-06, "loss": 0.8676, "step": 5522 }, { "epoch": 0.19508232187876673, "grad_norm": 1.6545299291610718, "learning_rate": 9.302361383148158e-06, "loss": 0.8763, "step": 5523 }, { "epoch": 0.19511764368247464, "grad_norm": 1.9254122972488403, "learning_rate": 9.302069917527061e-06, "loss": 0.8355, "step": 5524 }, { "epoch": 0.19515296548618255, "grad_norm": 2.1767115592956543, "learning_rate": 9.301778395601281e-06, "loss": 0.8231, "step": 5525 }, { "epoch": 0.19518828728989046, "grad_norm": 1.8792920112609863, "learning_rate": 9.301486817374635e-06, "loss": 0.8734, "step": 5526 }, { "epoch": 0.19522360909359837, "grad_norm": 1.7122982740402222, "learning_rate": 9.301195182850937e-06, "loss": 0.8421, "step": 5527 }, { "epoch": 0.19525893089730628, "grad_norm": 1.8153445720672607, "learning_rate": 9.300903492034005e-06, "loss": 0.8504, "step": 5528 }, { "epoch": 0.19529425270101416, "grad_norm": 1.909383773803711, "learning_rate": 9.300611744927658e-06, "loss": 0.8906, "step": 5529 }, { "epoch": 0.19532957450472208, "grad_norm": 1.7096147537231445, "learning_rate": 9.30031994153571e-06, "loss": 0.8235, "step": 5530 }, { "epoch": 0.19536489630842999, "grad_norm": 1.6567519903182983, "learning_rate": 9.300028081861984e-06, "loss": 0.8841, "step": 5531 }, { "epoch": 0.1954002181121379, "grad_norm": 1.8410899639129639, "learning_rate": 9.2997361659103e-06, "loss": 0.8527, "step": 5532 }, { "epoch": 0.1954355399158458, "grad_norm": 1.8093184232711792, "learning_rate": 9.299444193684476e-06, "loss": 0.8393, "step": 5533 }, { "epoch": 0.19547086171955372, "grad_norm": 2.0263376235961914, "learning_rate": 9.299152165188336e-06, "loss": 0.861, "step": 5534 }, { "epoch": 0.19550618352326163, "grad_norm": 1.9260720014572144, "learning_rate": 9.298860080425701e-06, "loss": 0.8623, "step": 5535 }, { "epoch": 0.1955415053269695, "grad_norm": 1.8079100847244263, "learning_rate": 9.298567939400392e-06, "loss": 0.8417, "step": 5536 }, { "epoch": 0.19557682713067742, "grad_norm": 2.137331485748291, "learning_rate": 9.298275742116233e-06, "loss": 0.867, "step": 5537 }, { "epoch": 0.19561214893438533, "grad_norm": 1.7263847589492798, "learning_rate": 9.297983488577052e-06, "loss": 0.8724, "step": 5538 }, { "epoch": 0.19564747073809324, "grad_norm": 2.0993969440460205, "learning_rate": 9.297691178786669e-06, "loss": 0.849, "step": 5539 }, { "epoch": 0.19568279254180115, "grad_norm": 2.0248091220855713, "learning_rate": 9.297398812748912e-06, "loss": 0.8519, "step": 5540 }, { "epoch": 0.19571811434550906, "grad_norm": 1.8099161386489868, "learning_rate": 9.297106390467607e-06, "loss": 0.8845, "step": 5541 }, { "epoch": 0.19575343614921695, "grad_norm": 1.7917027473449707, "learning_rate": 9.296813911946582e-06, "loss": 0.8481, "step": 5542 }, { "epoch": 0.19578875795292486, "grad_norm": 1.844480037689209, "learning_rate": 9.296521377189664e-06, "loss": 0.8747, "step": 5543 }, { "epoch": 0.19582407975663277, "grad_norm": 1.8589826822280884, "learning_rate": 9.296228786200683e-06, "loss": 0.8435, "step": 5544 }, { "epoch": 0.19585940156034068, "grad_norm": 2.0109403133392334, "learning_rate": 9.295936138983465e-06, "loss": 0.8378, "step": 5545 }, { "epoch": 0.1958947233640486, "grad_norm": 1.827653169631958, "learning_rate": 9.295643435541843e-06, "loss": 0.8709, "step": 5546 }, { "epoch": 0.1959300451677565, "grad_norm": 1.6960334777832031, "learning_rate": 9.295350675879648e-06, "loss": 0.925, "step": 5547 }, { "epoch": 0.1959653669714644, "grad_norm": 1.7086964845657349, "learning_rate": 9.295057860000711e-06, "loss": 0.8221, "step": 5548 }, { "epoch": 0.1960006887751723, "grad_norm": 7.445106506347656, "learning_rate": 9.294764987908863e-06, "loss": 0.8773, "step": 5549 }, { "epoch": 0.1960360105788802, "grad_norm": 1.8581434488296509, "learning_rate": 9.294472059607937e-06, "loss": 0.8706, "step": 5550 }, { "epoch": 0.19607133238258811, "grad_norm": 1.7432055473327637, "learning_rate": 9.29417907510177e-06, "loss": 0.8374, "step": 5551 }, { "epoch": 0.19610665418629603, "grad_norm": 1.6999166011810303, "learning_rate": 9.293886034394196e-06, "loss": 0.8216, "step": 5552 }, { "epoch": 0.19614197599000394, "grad_norm": 1.8980830907821655, "learning_rate": 9.293592937489046e-06, "loss": 0.8381, "step": 5553 }, { "epoch": 0.19617729779371185, "grad_norm": 1.0414855480194092, "learning_rate": 9.29329978439016e-06, "loss": 0.6, "step": 5554 }, { "epoch": 0.19621261959741973, "grad_norm": 1.9027236700057983, "learning_rate": 9.293006575101373e-06, "loss": 0.835, "step": 5555 }, { "epoch": 0.19624794140112764, "grad_norm": 1.7507673501968384, "learning_rate": 9.292713309626521e-06, "loss": 0.8154, "step": 5556 }, { "epoch": 0.19628326320483555, "grad_norm": 1.6812399625778198, "learning_rate": 9.292419987969448e-06, "loss": 0.8821, "step": 5557 }, { "epoch": 0.19631858500854346, "grad_norm": 1.5296971797943115, "learning_rate": 9.292126610133986e-06, "loss": 0.8184, "step": 5558 }, { "epoch": 0.19635390681225137, "grad_norm": 1.6460458040237427, "learning_rate": 9.291833176123979e-06, "loss": 0.8946, "step": 5559 }, { "epoch": 0.19638922861595928, "grad_norm": 1.538291096687317, "learning_rate": 9.291539685943267e-06, "loss": 0.8656, "step": 5560 }, { "epoch": 0.1964245504196672, "grad_norm": 2.1323070526123047, "learning_rate": 9.29124613959569e-06, "loss": 0.8514, "step": 5561 }, { "epoch": 0.19645987222337508, "grad_norm": 1.9466625452041626, "learning_rate": 9.290952537085088e-06, "loss": 0.8509, "step": 5562 }, { "epoch": 0.196495194027083, "grad_norm": 1.7006266117095947, "learning_rate": 9.290658878415309e-06, "loss": 0.8787, "step": 5563 }, { "epoch": 0.1965305158307909, "grad_norm": 1.6729010343551636, "learning_rate": 9.290365163590191e-06, "loss": 0.8627, "step": 5564 }, { "epoch": 0.1965658376344988, "grad_norm": 1.8834092617034912, "learning_rate": 9.29007139261358e-06, "loss": 0.855, "step": 5565 }, { "epoch": 0.19660115943820672, "grad_norm": 2.256056308746338, "learning_rate": 9.289777565489322e-06, "loss": 0.8497, "step": 5566 }, { "epoch": 0.19663648124191463, "grad_norm": 2.026407241821289, "learning_rate": 9.289483682221262e-06, "loss": 0.8639, "step": 5567 }, { "epoch": 0.1966718030456225, "grad_norm": 1.8107295036315918, "learning_rate": 9.289189742813245e-06, "loss": 0.8528, "step": 5568 }, { "epoch": 0.19670712484933042, "grad_norm": 1.8859601020812988, "learning_rate": 9.28889574726912e-06, "loss": 0.874, "step": 5569 }, { "epoch": 0.19674244665303833, "grad_norm": 1.9425172805786133, "learning_rate": 9.288601695592732e-06, "loss": 0.8595, "step": 5570 }, { "epoch": 0.19677776845674624, "grad_norm": 1.909919261932373, "learning_rate": 9.288307587787932e-06, "loss": 0.8769, "step": 5571 }, { "epoch": 0.19681309026045415, "grad_norm": 1.7428181171417236, "learning_rate": 9.28801342385857e-06, "loss": 0.8501, "step": 5572 }, { "epoch": 0.19684841206416206, "grad_norm": 1.5488909482955933, "learning_rate": 9.287719203808492e-06, "loss": 0.7976, "step": 5573 }, { "epoch": 0.19688373386786998, "grad_norm": 2.054861545562744, "learning_rate": 9.287424927641552e-06, "loss": 0.8675, "step": 5574 }, { "epoch": 0.19691905567157786, "grad_norm": 1.6576387882232666, "learning_rate": 9.2871305953616e-06, "loss": 0.853, "step": 5575 }, { "epoch": 0.19695437747528577, "grad_norm": 1.8574563264846802, "learning_rate": 9.28683620697249e-06, "loss": 0.8594, "step": 5576 }, { "epoch": 0.19698969927899368, "grad_norm": 1.7121539115905762, "learning_rate": 9.286541762478072e-06, "loss": 0.8441, "step": 5577 }, { "epoch": 0.1970250210827016, "grad_norm": 1.8818432092666626, "learning_rate": 9.286247261882202e-06, "loss": 0.8398, "step": 5578 }, { "epoch": 0.1970603428864095, "grad_norm": 1.6238269805908203, "learning_rate": 9.285952705188735e-06, "loss": 0.8275, "step": 5579 }, { "epoch": 0.1970956646901174, "grad_norm": 1.8627161979675293, "learning_rate": 9.285658092401521e-06, "loss": 0.8794, "step": 5580 }, { "epoch": 0.1971309864938253, "grad_norm": 1.7422034740447998, "learning_rate": 9.285363423524424e-06, "loss": 0.8138, "step": 5581 }, { "epoch": 0.1971663082975332, "grad_norm": 1.9542180299758911, "learning_rate": 9.285068698561293e-06, "loss": 0.8963, "step": 5582 }, { "epoch": 0.19720163010124112, "grad_norm": 1.655910611152649, "learning_rate": 9.284773917515988e-06, "loss": 0.8219, "step": 5583 }, { "epoch": 0.19723695190494903, "grad_norm": 1.787674069404602, "learning_rate": 9.28447908039237e-06, "loss": 0.8634, "step": 5584 }, { "epoch": 0.19727227370865694, "grad_norm": 1.8409258127212524, "learning_rate": 9.284184187194293e-06, "loss": 0.8963, "step": 5585 }, { "epoch": 0.19730759551236485, "grad_norm": 1.6622487306594849, "learning_rate": 9.283889237925618e-06, "loss": 0.8552, "step": 5586 }, { "epoch": 0.19734291731607276, "grad_norm": 1.6655670404434204, "learning_rate": 9.283594232590208e-06, "loss": 0.8731, "step": 5587 }, { "epoch": 0.19737823911978064, "grad_norm": 1.6652642488479614, "learning_rate": 9.283299171191922e-06, "loss": 0.8625, "step": 5588 }, { "epoch": 0.19741356092348855, "grad_norm": 1.7954766750335693, "learning_rate": 9.283004053734622e-06, "loss": 0.8781, "step": 5589 }, { "epoch": 0.19744888272719646, "grad_norm": 1.9681825637817383, "learning_rate": 9.282708880222168e-06, "loss": 0.8666, "step": 5590 }, { "epoch": 0.19748420453090437, "grad_norm": 1.6755924224853516, "learning_rate": 9.282413650658426e-06, "loss": 0.8488, "step": 5591 }, { "epoch": 0.19751952633461228, "grad_norm": 1.9086008071899414, "learning_rate": 9.28211836504726e-06, "loss": 0.8613, "step": 5592 }, { "epoch": 0.1975548481383202, "grad_norm": 1.6907557249069214, "learning_rate": 9.281823023392532e-06, "loss": 0.8715, "step": 5593 }, { "epoch": 0.19759016994202808, "grad_norm": 1.7685587406158447, "learning_rate": 9.28152762569811e-06, "loss": 0.8357, "step": 5594 }, { "epoch": 0.197625491745736, "grad_norm": 1.7962071895599365, "learning_rate": 9.28123217196786e-06, "loss": 0.8517, "step": 5595 }, { "epoch": 0.1976608135494439, "grad_norm": 1.6279674768447876, "learning_rate": 9.280936662205647e-06, "loss": 0.8504, "step": 5596 }, { "epoch": 0.1976961353531518, "grad_norm": 1.6323403120040894, "learning_rate": 9.280641096415342e-06, "loss": 0.8465, "step": 5597 }, { "epoch": 0.19773145715685972, "grad_norm": 1.7467995882034302, "learning_rate": 9.280345474600808e-06, "loss": 0.8288, "step": 5598 }, { "epoch": 0.19776677896056763, "grad_norm": 1.629370093345642, "learning_rate": 9.28004979676592e-06, "loss": 0.8337, "step": 5599 }, { "epoch": 0.19780210076427554, "grad_norm": 1.6652438640594482, "learning_rate": 9.279754062914542e-06, "loss": 0.8599, "step": 5600 }, { "epoch": 0.19783742256798342, "grad_norm": 1.7893359661102295, "learning_rate": 9.279458273050547e-06, "loss": 0.8379, "step": 5601 }, { "epoch": 0.19787274437169133, "grad_norm": 1.6609479188919067, "learning_rate": 9.279162427177811e-06, "loss": 0.8253, "step": 5602 }, { "epoch": 0.19790806617539924, "grad_norm": 2.0774497985839844, "learning_rate": 9.278866525300197e-06, "loss": 0.8084, "step": 5603 }, { "epoch": 0.19794338797910715, "grad_norm": 1.6277087926864624, "learning_rate": 9.278570567421584e-06, "loss": 0.8576, "step": 5604 }, { "epoch": 0.19797870978281507, "grad_norm": 1.7244548797607422, "learning_rate": 9.278274553545843e-06, "loss": 0.8361, "step": 5605 }, { "epoch": 0.19801403158652298, "grad_norm": 1.784799337387085, "learning_rate": 9.277978483676851e-06, "loss": 0.9262, "step": 5606 }, { "epoch": 0.19804935339023086, "grad_norm": 1.7078173160552979, "learning_rate": 9.277682357818479e-06, "loss": 0.8661, "step": 5607 }, { "epoch": 0.19808467519393877, "grad_norm": 1.6774938106536865, "learning_rate": 9.277386175974604e-06, "loss": 0.8513, "step": 5608 }, { "epoch": 0.19811999699764668, "grad_norm": 7.707015514373779, "learning_rate": 9.277089938149104e-06, "loss": 0.8382, "step": 5609 }, { "epoch": 0.1981553188013546, "grad_norm": 2.1762750148773193, "learning_rate": 9.276793644345854e-06, "loss": 0.8678, "step": 5610 }, { "epoch": 0.1981906406050625, "grad_norm": 2.019041061401367, "learning_rate": 9.276497294568733e-06, "loss": 0.8869, "step": 5611 }, { "epoch": 0.1982259624087704, "grad_norm": 1.7396560907363892, "learning_rate": 9.276200888821619e-06, "loss": 0.8983, "step": 5612 }, { "epoch": 0.19826128421247832, "grad_norm": 1.803279161453247, "learning_rate": 9.275904427108393e-06, "loss": 0.8477, "step": 5613 }, { "epoch": 0.1982966060161862, "grad_norm": 1.856323480606079, "learning_rate": 9.275607909432933e-06, "loss": 0.8616, "step": 5614 }, { "epoch": 0.19833192781989412, "grad_norm": 1.826521873474121, "learning_rate": 9.27531133579912e-06, "loss": 0.8646, "step": 5615 }, { "epoch": 0.19836724962360203, "grad_norm": 3.3322842121124268, "learning_rate": 9.275014706210835e-06, "loss": 0.8468, "step": 5616 }, { "epoch": 0.19840257142730994, "grad_norm": 1.688104271888733, "learning_rate": 9.274718020671961e-06, "loss": 0.813, "step": 5617 }, { "epoch": 0.19843789323101785, "grad_norm": 1.660125494003296, "learning_rate": 9.274421279186384e-06, "loss": 0.8925, "step": 5618 }, { "epoch": 0.19847321503472576, "grad_norm": 1.4851665496826172, "learning_rate": 9.274124481757982e-06, "loss": 0.852, "step": 5619 }, { "epoch": 0.19850853683843364, "grad_norm": 1.6189652681350708, "learning_rate": 9.273827628390643e-06, "loss": 0.8324, "step": 5620 }, { "epoch": 0.19854385864214155, "grad_norm": 1.7394602298736572, "learning_rate": 9.273530719088254e-06, "loss": 0.8534, "step": 5621 }, { "epoch": 0.19857918044584946, "grad_norm": 1.7625949382781982, "learning_rate": 9.273233753854695e-06, "loss": 0.8353, "step": 5622 }, { "epoch": 0.19861450224955737, "grad_norm": 1.7278761863708496, "learning_rate": 9.272936732693856e-06, "loss": 0.8464, "step": 5623 }, { "epoch": 0.19864982405326528, "grad_norm": 1.6085659265518188, "learning_rate": 9.272639655609625e-06, "loss": 0.8376, "step": 5624 }, { "epoch": 0.1986851458569732, "grad_norm": 1.8365917205810547, "learning_rate": 9.27234252260589e-06, "loss": 0.8665, "step": 5625 }, { "epoch": 0.1987204676606811, "grad_norm": 1.6968718767166138, "learning_rate": 9.27204533368654e-06, "loss": 0.8595, "step": 5626 }, { "epoch": 0.198755789464389, "grad_norm": 1.6950764656066895, "learning_rate": 9.271748088855461e-06, "loss": 0.8488, "step": 5627 }, { "epoch": 0.1987911112680969, "grad_norm": 1.6182771921157837, "learning_rate": 9.271450788116547e-06, "loss": 0.8901, "step": 5628 }, { "epoch": 0.1988264330718048, "grad_norm": 1.6748814582824707, "learning_rate": 9.27115343147369e-06, "loss": 0.868, "step": 5629 }, { "epoch": 0.19886175487551272, "grad_norm": 1.712728500366211, "learning_rate": 9.270856018930776e-06, "loss": 0.8318, "step": 5630 }, { "epoch": 0.19889707667922063, "grad_norm": 1.794195294380188, "learning_rate": 9.270558550491704e-06, "loss": 0.8709, "step": 5631 }, { "epoch": 0.19893239848292854, "grad_norm": 1.7289503812789917, "learning_rate": 9.270261026160364e-06, "loss": 0.8562, "step": 5632 }, { "epoch": 0.19896772028663642, "grad_norm": 1.5974137783050537, "learning_rate": 9.269963445940651e-06, "loss": 0.8709, "step": 5633 }, { "epoch": 0.19900304209034433, "grad_norm": 1.6475096940994263, "learning_rate": 9.269665809836457e-06, "loss": 0.8983, "step": 5634 }, { "epoch": 0.19903836389405224, "grad_norm": 1.6749995946884155, "learning_rate": 9.269368117851682e-06, "loss": 0.8651, "step": 5635 }, { "epoch": 0.19907368569776016, "grad_norm": 1.6606305837631226, "learning_rate": 9.269070369990219e-06, "loss": 0.8473, "step": 5636 }, { "epoch": 0.19910900750146807, "grad_norm": 1.6020069122314453, "learning_rate": 9.268772566255964e-06, "loss": 0.8234, "step": 5637 }, { "epoch": 0.19914432930517598, "grad_norm": 1.5405948162078857, "learning_rate": 9.268474706652817e-06, "loss": 0.8285, "step": 5638 }, { "epoch": 0.1991796511088839, "grad_norm": 1.645013689994812, "learning_rate": 9.268176791184675e-06, "loss": 0.8472, "step": 5639 }, { "epoch": 0.19921497291259177, "grad_norm": 1.632251501083374, "learning_rate": 9.267878819855439e-06, "loss": 0.8325, "step": 5640 }, { "epoch": 0.19925029471629968, "grad_norm": 1.680391788482666, "learning_rate": 9.267580792669007e-06, "loss": 0.8772, "step": 5641 }, { "epoch": 0.1992856165200076, "grad_norm": 1.7257390022277832, "learning_rate": 9.267282709629278e-06, "loss": 0.8632, "step": 5642 }, { "epoch": 0.1993209383237155, "grad_norm": 1.821245789527893, "learning_rate": 9.266984570740157e-06, "loss": 0.8415, "step": 5643 }, { "epoch": 0.1993562601274234, "grad_norm": 2.4837586879730225, "learning_rate": 9.266686376005542e-06, "loss": 0.8578, "step": 5644 }, { "epoch": 0.19939158193113132, "grad_norm": 1.7711448669433594, "learning_rate": 9.266388125429339e-06, "loss": 0.8318, "step": 5645 }, { "epoch": 0.1994269037348392, "grad_norm": 8.895468711853027, "learning_rate": 9.266089819015451e-06, "loss": 0.8817, "step": 5646 }, { "epoch": 0.19946222553854712, "grad_norm": 1.8117984533309937, "learning_rate": 9.265791456767781e-06, "loss": 0.8286, "step": 5647 }, { "epoch": 0.19949754734225503, "grad_norm": 1.635018229484558, "learning_rate": 9.265493038690233e-06, "loss": 0.8876, "step": 5648 }, { "epoch": 0.19953286914596294, "grad_norm": 1.681422472000122, "learning_rate": 9.265194564786715e-06, "loss": 0.8557, "step": 5649 }, { "epoch": 0.19956819094967085, "grad_norm": 1.6349796056747437, "learning_rate": 9.264896035061132e-06, "loss": 0.8214, "step": 5650 }, { "epoch": 0.19960351275337876, "grad_norm": 1.9951756000518799, "learning_rate": 9.264597449517391e-06, "loss": 0.8557, "step": 5651 }, { "epoch": 0.19963883455708667, "grad_norm": 1.9611048698425293, "learning_rate": 9.264298808159402e-06, "loss": 0.8327, "step": 5652 }, { "epoch": 0.19967415636079455, "grad_norm": 1.6981709003448486, "learning_rate": 9.264000110991069e-06, "loss": 0.8423, "step": 5653 }, { "epoch": 0.19970947816450246, "grad_norm": 1.7365829944610596, "learning_rate": 9.263701358016305e-06, "loss": 0.9012, "step": 5654 }, { "epoch": 0.19974479996821037, "grad_norm": 1.7198127508163452, "learning_rate": 9.263402549239022e-06, "loss": 0.8522, "step": 5655 }, { "epoch": 0.19978012177191828, "grad_norm": 1.8185139894485474, "learning_rate": 9.263103684663126e-06, "loss": 0.8347, "step": 5656 }, { "epoch": 0.1998154435756262, "grad_norm": 1.6479542255401611, "learning_rate": 9.26280476429253e-06, "loss": 0.8158, "step": 5657 }, { "epoch": 0.1998507653793341, "grad_norm": 1.7318483591079712, "learning_rate": 9.262505788131145e-06, "loss": 0.85, "step": 5658 }, { "epoch": 0.199886087183042, "grad_norm": 1.8158071041107178, "learning_rate": 9.262206756182888e-06, "loss": 0.8622, "step": 5659 }, { "epoch": 0.1999214089867499, "grad_norm": 1.7257344722747803, "learning_rate": 9.261907668451669e-06, "loss": 0.8786, "step": 5660 }, { "epoch": 0.1999567307904578, "grad_norm": 1.5706686973571777, "learning_rate": 9.261608524941403e-06, "loss": 0.8793, "step": 5661 }, { "epoch": 0.19999205259416572, "grad_norm": 1.6246229410171509, "learning_rate": 9.261309325656008e-06, "loss": 0.8476, "step": 5662 }, { "epoch": 0.20002737439787363, "grad_norm": 1.5633167028427124, "learning_rate": 9.261010070599394e-06, "loss": 0.8358, "step": 5663 }, { "epoch": 0.20006269620158154, "grad_norm": 1.626103162765503, "learning_rate": 9.260710759775484e-06, "loss": 0.8353, "step": 5664 }, { "epoch": 0.20009801800528945, "grad_norm": 1.5964059829711914, "learning_rate": 9.260411393188192e-06, "loss": 0.8482, "step": 5665 }, { "epoch": 0.20013333980899733, "grad_norm": 1.598383903503418, "learning_rate": 9.260111970841437e-06, "loss": 0.8609, "step": 5666 }, { "epoch": 0.20016866161270525, "grad_norm": 1.7630345821380615, "learning_rate": 9.259812492739135e-06, "loss": 0.8545, "step": 5667 }, { "epoch": 0.20020398341641316, "grad_norm": 1.6049857139587402, "learning_rate": 9.259512958885211e-06, "loss": 0.8538, "step": 5668 }, { "epoch": 0.20023930522012107, "grad_norm": 1.581155776977539, "learning_rate": 9.259213369283581e-06, "loss": 0.8428, "step": 5669 }, { "epoch": 0.20027462702382898, "grad_norm": 3.204801321029663, "learning_rate": 9.258913723938165e-06, "loss": 0.8047, "step": 5670 }, { "epoch": 0.2003099488275369, "grad_norm": 1.6525897979736328, "learning_rate": 9.25861402285289e-06, "loss": 0.8611, "step": 5671 }, { "epoch": 0.20034527063124477, "grad_norm": 1.6418310403823853, "learning_rate": 9.258314266031674e-06, "loss": 0.84, "step": 5672 }, { "epoch": 0.20038059243495268, "grad_norm": 1.614951491355896, "learning_rate": 9.258014453478441e-06, "loss": 0.8602, "step": 5673 }, { "epoch": 0.2004159142386606, "grad_norm": 1.955043077468872, "learning_rate": 9.257714585197116e-06, "loss": 0.8489, "step": 5674 }, { "epoch": 0.2004512360423685, "grad_norm": 1.773140549659729, "learning_rate": 9.25741466119162e-06, "loss": 0.8309, "step": 5675 }, { "epoch": 0.2004865578460764, "grad_norm": 1.6657848358154297, "learning_rate": 9.257114681465883e-06, "loss": 0.8522, "step": 5676 }, { "epoch": 0.20052187964978432, "grad_norm": 1.7541673183441162, "learning_rate": 9.25681464602383e-06, "loss": 0.864, "step": 5677 }, { "epoch": 0.20055720145349223, "grad_norm": 1.6068446636199951, "learning_rate": 9.256514554869386e-06, "loss": 0.8197, "step": 5678 }, { "epoch": 0.20059252325720012, "grad_norm": 1.746397614479065, "learning_rate": 9.256214408006482e-06, "loss": 0.8438, "step": 5679 }, { "epoch": 0.20062784506090803, "grad_norm": 1.6599867343902588, "learning_rate": 9.255914205439043e-06, "loss": 0.8482, "step": 5680 }, { "epoch": 0.20066316686461594, "grad_norm": 1.856126070022583, "learning_rate": 9.255613947170999e-06, "loss": 0.8627, "step": 5681 }, { "epoch": 0.20069848866832385, "grad_norm": 1.71256685256958, "learning_rate": 9.255313633206278e-06, "loss": 0.8501, "step": 5682 }, { "epoch": 0.20073381047203176, "grad_norm": 1.8356653451919556, "learning_rate": 9.255013263548812e-06, "loss": 0.8305, "step": 5683 }, { "epoch": 0.20076913227573967, "grad_norm": 1.7356746196746826, "learning_rate": 9.254712838202533e-06, "loss": 0.8231, "step": 5684 }, { "epoch": 0.20080445407944758, "grad_norm": 1.6144124269485474, "learning_rate": 9.254412357171373e-06, "loss": 0.8634, "step": 5685 }, { "epoch": 0.20083977588315546, "grad_norm": 1.529748558998108, "learning_rate": 9.254111820459262e-06, "loss": 0.8428, "step": 5686 }, { "epoch": 0.20087509768686337, "grad_norm": 1.7671281099319458, "learning_rate": 9.253811228070138e-06, "loss": 0.8793, "step": 5687 }, { "epoch": 0.20091041949057128, "grad_norm": 1.5700517892837524, "learning_rate": 9.25351058000793e-06, "loss": 0.8403, "step": 5688 }, { "epoch": 0.2009457412942792, "grad_norm": 1.6084860563278198, "learning_rate": 9.253209876276576e-06, "loss": 0.8376, "step": 5689 }, { "epoch": 0.2009810630979871, "grad_norm": 1.5498851537704468, "learning_rate": 9.25290911688001e-06, "loss": 0.8581, "step": 5690 }, { "epoch": 0.20101638490169502, "grad_norm": 1.7249373197555542, "learning_rate": 9.25260830182217e-06, "loss": 0.8539, "step": 5691 }, { "epoch": 0.2010517067054029, "grad_norm": 2.211549997329712, "learning_rate": 9.25230743110699e-06, "loss": 0.813, "step": 5692 }, { "epoch": 0.2010870285091108, "grad_norm": 1.8214764595031738, "learning_rate": 9.25200650473841e-06, "loss": 0.828, "step": 5693 }, { "epoch": 0.20112235031281872, "grad_norm": 1.9948832988739014, "learning_rate": 9.251705522720371e-06, "loss": 0.8249, "step": 5694 }, { "epoch": 0.20115767211652663, "grad_norm": 2.0017547607421875, "learning_rate": 9.251404485056807e-06, "loss": 0.883, "step": 5695 }, { "epoch": 0.20119299392023454, "grad_norm": 1.7344673871994019, "learning_rate": 9.25110339175166e-06, "loss": 0.8495, "step": 5696 }, { "epoch": 0.20122831572394245, "grad_norm": 1.7418115139007568, "learning_rate": 9.250802242808872e-06, "loss": 0.8647, "step": 5697 }, { "epoch": 0.20126363752765036, "grad_norm": 1.7228413820266724, "learning_rate": 9.250501038232384e-06, "loss": 0.8652, "step": 5698 }, { "epoch": 0.20129895933135825, "grad_norm": 1.6246671676635742, "learning_rate": 9.250199778026136e-06, "loss": 0.8578, "step": 5699 }, { "epoch": 0.20133428113506616, "grad_norm": 1.700922966003418, "learning_rate": 9.249898462194074e-06, "loss": 0.8627, "step": 5700 }, { "epoch": 0.20136960293877407, "grad_norm": 1.5943361520767212, "learning_rate": 9.249597090740136e-06, "loss": 0.8759, "step": 5701 }, { "epoch": 0.20140492474248198, "grad_norm": 1.6098666191101074, "learning_rate": 9.249295663668274e-06, "loss": 0.8377, "step": 5702 }, { "epoch": 0.2014402465461899, "grad_norm": 1.6595796346664429, "learning_rate": 9.248994180982427e-06, "loss": 0.826, "step": 5703 }, { "epoch": 0.2014755683498978, "grad_norm": 1.5836702585220337, "learning_rate": 9.248692642686545e-06, "loss": 0.8547, "step": 5704 }, { "epoch": 0.20151089015360568, "grad_norm": 1.6192071437835693, "learning_rate": 9.248391048784571e-06, "loss": 0.8458, "step": 5705 }, { "epoch": 0.2015462119573136, "grad_norm": 1.6577805280685425, "learning_rate": 9.248089399280452e-06, "loss": 0.8209, "step": 5706 }, { "epoch": 0.2015815337610215, "grad_norm": 1.6965159177780151, "learning_rate": 9.24778769417814e-06, "loss": 0.798, "step": 5707 }, { "epoch": 0.2016168555647294, "grad_norm": 1.7115288972854614, "learning_rate": 9.247485933481579e-06, "loss": 0.8501, "step": 5708 }, { "epoch": 0.20165217736843732, "grad_norm": 1.628240942955017, "learning_rate": 9.247184117194723e-06, "loss": 0.8423, "step": 5709 }, { "epoch": 0.20168749917214523, "grad_norm": 1.6221669912338257, "learning_rate": 9.246882245321518e-06, "loss": 0.8844, "step": 5710 }, { "epoch": 0.20172282097585315, "grad_norm": 1.6259862184524536, "learning_rate": 9.246580317865918e-06, "loss": 0.833, "step": 5711 }, { "epoch": 0.20175814277956103, "grad_norm": 1.7523096799850464, "learning_rate": 9.246278334831871e-06, "loss": 0.842, "step": 5712 }, { "epoch": 0.20179346458326894, "grad_norm": 1.8161789178848267, "learning_rate": 9.245976296223333e-06, "loss": 0.8636, "step": 5713 }, { "epoch": 0.20182878638697685, "grad_norm": 1.6691665649414062, "learning_rate": 9.245674202044255e-06, "loss": 0.8464, "step": 5714 }, { "epoch": 0.20186410819068476, "grad_norm": 1.5496289730072021, "learning_rate": 9.24537205229859e-06, "loss": 0.8447, "step": 5715 }, { "epoch": 0.20189942999439267, "grad_norm": 1.1967389583587646, "learning_rate": 9.245069846990295e-06, "loss": 0.6124, "step": 5716 }, { "epoch": 0.20193475179810058, "grad_norm": 1.815869688987732, "learning_rate": 9.244767586123323e-06, "loss": 0.8679, "step": 5717 }, { "epoch": 0.20197007360180846, "grad_norm": 1.7033284902572632, "learning_rate": 9.24446526970163e-06, "loss": 0.8471, "step": 5718 }, { "epoch": 0.20200539540551637, "grad_norm": 1.6290974617004395, "learning_rate": 9.244162897729177e-06, "loss": 0.8245, "step": 5719 }, { "epoch": 0.20204071720922429, "grad_norm": 1.626656174659729, "learning_rate": 9.243860470209914e-06, "loss": 0.8334, "step": 5720 }, { "epoch": 0.2020760390129322, "grad_norm": 1.740192174911499, "learning_rate": 9.243557987147804e-06, "loss": 0.8626, "step": 5721 }, { "epoch": 0.2021113608166401, "grad_norm": 1.8884235620498657, "learning_rate": 9.243255448546806e-06, "loss": 0.8582, "step": 5722 }, { "epoch": 0.20214668262034802, "grad_norm": 1.639346957206726, "learning_rate": 9.242952854410877e-06, "loss": 0.8424, "step": 5723 }, { "epoch": 0.20218200442405593, "grad_norm": 1.5612863302230835, "learning_rate": 9.242650204743978e-06, "loss": 0.8099, "step": 5724 }, { "epoch": 0.2022173262277638, "grad_norm": 1.7780084609985352, "learning_rate": 9.242347499550071e-06, "loss": 0.9036, "step": 5725 }, { "epoch": 0.20225264803147172, "grad_norm": 1.6198408603668213, "learning_rate": 9.242044738833119e-06, "loss": 0.8396, "step": 5726 }, { "epoch": 0.20228796983517963, "grad_norm": 1.8196858167648315, "learning_rate": 9.241741922597082e-06, "loss": 0.8851, "step": 5727 }, { "epoch": 0.20232329163888754, "grad_norm": 1.8122929334640503, "learning_rate": 9.241439050845923e-06, "loss": 0.8688, "step": 5728 }, { "epoch": 0.20235861344259545, "grad_norm": 1.6588748693466187, "learning_rate": 9.24113612358361e-06, "loss": 0.8245, "step": 5729 }, { "epoch": 0.20239393524630336, "grad_norm": 1.8544795513153076, "learning_rate": 9.240833140814101e-06, "loss": 0.8662, "step": 5730 }, { "epoch": 0.20242925705001125, "grad_norm": 1.8079848289489746, "learning_rate": 9.240530102541368e-06, "loss": 0.8387, "step": 5731 }, { "epoch": 0.20246457885371916, "grad_norm": 1.7639955282211304, "learning_rate": 9.240227008769372e-06, "loss": 0.8427, "step": 5732 }, { "epoch": 0.20249990065742707, "grad_norm": 1.8801325559616089, "learning_rate": 9.239923859502083e-06, "loss": 0.8642, "step": 5733 }, { "epoch": 0.20253522246113498, "grad_norm": 1.7270832061767578, "learning_rate": 9.239620654743466e-06, "loss": 0.8426, "step": 5734 }, { "epoch": 0.2025705442648429, "grad_norm": 1.7459149360656738, "learning_rate": 9.239317394497494e-06, "loss": 0.8572, "step": 5735 }, { "epoch": 0.2026058660685508, "grad_norm": 1.6722899675369263, "learning_rate": 9.23901407876813e-06, "loss": 0.8397, "step": 5736 }, { "epoch": 0.2026411878722587, "grad_norm": 1.6815192699432373, "learning_rate": 9.238710707559348e-06, "loss": 0.8275, "step": 5737 }, { "epoch": 0.2026765096759666, "grad_norm": 1.6163268089294434, "learning_rate": 9.238407280875118e-06, "loss": 0.8612, "step": 5738 }, { "epoch": 0.2027118314796745, "grad_norm": 1.7943346500396729, "learning_rate": 9.238103798719408e-06, "loss": 0.8992, "step": 5739 }, { "epoch": 0.20274715328338241, "grad_norm": 1.7106285095214844, "learning_rate": 9.237800261096193e-06, "loss": 0.836, "step": 5740 }, { "epoch": 0.20278247508709032, "grad_norm": 1.9107111692428589, "learning_rate": 9.237496668009444e-06, "loss": 0.8528, "step": 5741 }, { "epoch": 0.20281779689079824, "grad_norm": 1.6590036153793335, "learning_rate": 9.237193019463136e-06, "loss": 0.8646, "step": 5742 }, { "epoch": 0.20285311869450615, "grad_norm": 1.8316537141799927, "learning_rate": 9.236889315461242e-06, "loss": 0.891, "step": 5743 }, { "epoch": 0.20288844049821403, "grad_norm": 1.5356727838516235, "learning_rate": 9.236585556007737e-06, "loss": 0.8382, "step": 5744 }, { "epoch": 0.20292376230192194, "grad_norm": 1.637166976928711, "learning_rate": 9.236281741106598e-06, "loss": 0.8038, "step": 5745 }, { "epoch": 0.20295908410562985, "grad_norm": 1.7143281698226929, "learning_rate": 9.235977870761799e-06, "loss": 0.8516, "step": 5746 }, { "epoch": 0.20299440590933776, "grad_norm": 1.7447353601455688, "learning_rate": 9.235673944977318e-06, "loss": 0.8447, "step": 5747 }, { "epoch": 0.20302972771304567, "grad_norm": 1.5919220447540283, "learning_rate": 9.235369963757133e-06, "loss": 0.8435, "step": 5748 }, { "epoch": 0.20306504951675358, "grad_norm": 1.6700646877288818, "learning_rate": 9.23506592710522e-06, "loss": 0.8467, "step": 5749 }, { "epoch": 0.2031003713204615, "grad_norm": 1.5734409093856812, "learning_rate": 9.234761835025562e-06, "loss": 0.8814, "step": 5750 }, { "epoch": 0.20313569312416938, "grad_norm": 1.7907662391662598, "learning_rate": 9.234457687522139e-06, "loss": 0.8514, "step": 5751 }, { "epoch": 0.20317101492787729, "grad_norm": 1.7955964803695679, "learning_rate": 9.234153484598927e-06, "loss": 0.8135, "step": 5752 }, { "epoch": 0.2032063367315852, "grad_norm": 2.236650228500366, "learning_rate": 9.23384922625991e-06, "loss": 0.9007, "step": 5753 }, { "epoch": 0.2032416585352931, "grad_norm": 1.761399269104004, "learning_rate": 9.233544912509071e-06, "loss": 0.8643, "step": 5754 }, { "epoch": 0.20327698033900102, "grad_norm": 1.7594921588897705, "learning_rate": 9.233240543350393e-06, "loss": 0.8534, "step": 5755 }, { "epoch": 0.20331230214270893, "grad_norm": 1.67001211643219, "learning_rate": 9.232936118787858e-06, "loss": 0.8347, "step": 5756 }, { "epoch": 0.2033476239464168, "grad_norm": 1.8903999328613281, "learning_rate": 9.232631638825449e-06, "loss": 0.8659, "step": 5757 }, { "epoch": 0.20338294575012472, "grad_norm": 1.7766618728637695, "learning_rate": 9.232327103467155e-06, "loss": 0.8705, "step": 5758 }, { "epoch": 0.20341826755383263, "grad_norm": 1.7053090333938599, "learning_rate": 9.232022512716959e-06, "loss": 0.8504, "step": 5759 }, { "epoch": 0.20345358935754054, "grad_norm": 1.9624662399291992, "learning_rate": 9.231717866578846e-06, "loss": 0.8665, "step": 5760 }, { "epoch": 0.20348891116124845, "grad_norm": 1.8110816478729248, "learning_rate": 9.231413165056808e-06, "loss": 0.8537, "step": 5761 }, { "epoch": 0.20352423296495636, "grad_norm": 1.8523222208023071, "learning_rate": 9.231108408154827e-06, "loss": 0.859, "step": 5762 }, { "epoch": 0.20355955476866427, "grad_norm": 1.8422338962554932, "learning_rate": 9.230803595876895e-06, "loss": 0.8553, "step": 5763 }, { "epoch": 0.20359487657237216, "grad_norm": 1.7073333263397217, "learning_rate": 9.230498728227004e-06, "loss": 0.8247, "step": 5764 }, { "epoch": 0.20363019837608007, "grad_norm": 1.641064167022705, "learning_rate": 9.230193805209138e-06, "loss": 0.8417, "step": 5765 }, { "epoch": 0.20366552017978798, "grad_norm": 1.5746040344238281, "learning_rate": 9.229888826827291e-06, "loss": 0.8406, "step": 5766 }, { "epoch": 0.2037008419834959, "grad_norm": 1.6519078016281128, "learning_rate": 9.229583793085455e-06, "loss": 0.8721, "step": 5767 }, { "epoch": 0.2037361637872038, "grad_norm": 1.6892644166946411, "learning_rate": 9.22927870398762e-06, "loss": 0.903, "step": 5768 }, { "epoch": 0.2037714855909117, "grad_norm": 2.1922967433929443, "learning_rate": 9.228973559537782e-06, "loss": 0.846, "step": 5769 }, { "epoch": 0.2038068073946196, "grad_norm": 1.6563817262649536, "learning_rate": 9.228668359739932e-06, "loss": 0.8476, "step": 5770 }, { "epoch": 0.2038421291983275, "grad_norm": 1.635582447052002, "learning_rate": 9.228363104598066e-06, "loss": 0.858, "step": 5771 }, { "epoch": 0.20387745100203541, "grad_norm": 1.7869840860366821, "learning_rate": 9.228057794116178e-06, "loss": 0.8829, "step": 5772 }, { "epoch": 0.20391277280574333, "grad_norm": 1.6430155038833618, "learning_rate": 9.227752428298265e-06, "loss": 0.8216, "step": 5773 }, { "epoch": 0.20394809460945124, "grad_norm": 1.5908482074737549, "learning_rate": 9.227447007148322e-06, "loss": 0.8417, "step": 5774 }, { "epoch": 0.20398341641315915, "grad_norm": 1.63456130027771, "learning_rate": 9.227141530670347e-06, "loss": 0.8763, "step": 5775 }, { "epoch": 0.20401873821686706, "grad_norm": 1.7453818321228027, "learning_rate": 9.22683599886834e-06, "loss": 0.8456, "step": 5776 }, { "epoch": 0.20405406002057494, "grad_norm": 1.3865158557891846, "learning_rate": 9.226530411746298e-06, "loss": 0.605, "step": 5777 }, { "epoch": 0.20408938182428285, "grad_norm": 1.7119088172912598, "learning_rate": 9.226224769308219e-06, "loss": 0.8941, "step": 5778 }, { "epoch": 0.20412470362799076, "grad_norm": 1.6436370611190796, "learning_rate": 9.225919071558105e-06, "loss": 0.8037, "step": 5779 }, { "epoch": 0.20416002543169867, "grad_norm": 1.6813877820968628, "learning_rate": 9.225613318499956e-06, "loss": 0.833, "step": 5780 }, { "epoch": 0.20419534723540658, "grad_norm": 1.6713223457336426, "learning_rate": 9.225307510137775e-06, "loss": 0.8551, "step": 5781 }, { "epoch": 0.2042306690391145, "grad_norm": 1.7915440797805786, "learning_rate": 9.225001646475564e-06, "loss": 0.8416, "step": 5782 }, { "epoch": 0.20426599084282238, "grad_norm": 1.698873519897461, "learning_rate": 9.224695727517325e-06, "loss": 0.8325, "step": 5783 }, { "epoch": 0.2043013126465303, "grad_norm": 1.7303366661071777, "learning_rate": 9.224389753267062e-06, "loss": 0.8189, "step": 5784 }, { "epoch": 0.2043366344502382, "grad_norm": 1.7708970308303833, "learning_rate": 9.224083723728781e-06, "loss": 0.8561, "step": 5785 }, { "epoch": 0.2043719562539461, "grad_norm": 0.9678504467010498, "learning_rate": 9.223777638906486e-06, "loss": 0.5623, "step": 5786 }, { "epoch": 0.20440727805765402, "grad_norm": 1.6139354705810547, "learning_rate": 9.223471498804184e-06, "loss": 0.8512, "step": 5787 }, { "epoch": 0.20444259986136193, "grad_norm": 1.7787232398986816, "learning_rate": 9.22316530342588e-06, "loss": 0.873, "step": 5788 }, { "epoch": 0.20447792166506984, "grad_norm": 1.6735296249389648, "learning_rate": 9.222859052775583e-06, "loss": 0.8615, "step": 5789 }, { "epoch": 0.20451324346877772, "grad_norm": 1.7304011583328247, "learning_rate": 9.222552746857299e-06, "loss": 0.8821, "step": 5790 }, { "epoch": 0.20454856527248563, "grad_norm": 1.955115795135498, "learning_rate": 9.22224638567504e-06, "loss": 0.8448, "step": 5791 }, { "epoch": 0.20458388707619354, "grad_norm": 1.763134241104126, "learning_rate": 9.221939969232814e-06, "loss": 0.8521, "step": 5792 }, { "epoch": 0.20461920887990145, "grad_norm": 1.8017114400863647, "learning_rate": 9.22163349753463e-06, "loss": 0.8256, "step": 5793 }, { "epoch": 0.20465453068360936, "grad_norm": 1.7151238918304443, "learning_rate": 9.2213269705845e-06, "loss": 0.8562, "step": 5794 }, { "epoch": 0.20468985248731728, "grad_norm": 1.6156647205352783, "learning_rate": 9.22102038838644e-06, "loss": 0.8494, "step": 5795 }, { "epoch": 0.20472517429102516, "grad_norm": 2.054666519165039, "learning_rate": 9.220713750944454e-06, "loss": 0.8654, "step": 5796 }, { "epoch": 0.20476049609473307, "grad_norm": 2.558950901031494, "learning_rate": 9.220407058262561e-06, "loss": 0.8488, "step": 5797 }, { "epoch": 0.20479581789844098, "grad_norm": 1.602168321609497, "learning_rate": 9.220100310344774e-06, "loss": 0.8629, "step": 5798 }, { "epoch": 0.2048311397021489, "grad_norm": 1.5811645984649658, "learning_rate": 9.21979350719511e-06, "loss": 0.8283, "step": 5799 }, { "epoch": 0.2048664615058568, "grad_norm": 1.6990010738372803, "learning_rate": 9.21948664881758e-06, "loss": 0.8619, "step": 5800 }, { "epoch": 0.2049017833095647, "grad_norm": 3.352092981338501, "learning_rate": 9.219179735216201e-06, "loss": 0.8655, "step": 5801 }, { "epoch": 0.20493710511327262, "grad_norm": 1.6794782876968384, "learning_rate": 9.218872766394991e-06, "loss": 0.8441, "step": 5802 }, { "epoch": 0.2049724269169805, "grad_norm": 1.6256835460662842, "learning_rate": 9.218565742357969e-06, "loss": 0.8491, "step": 5803 }, { "epoch": 0.20500774872068842, "grad_norm": 1.8244903087615967, "learning_rate": 9.21825866310915e-06, "loss": 0.8993, "step": 5804 }, { "epoch": 0.20504307052439633, "grad_norm": 1.901275396347046, "learning_rate": 9.217951528652554e-06, "loss": 0.8689, "step": 5805 }, { "epoch": 0.20507839232810424, "grad_norm": 1.6197034120559692, "learning_rate": 9.217644338992203e-06, "loss": 0.8633, "step": 5806 }, { "epoch": 0.20511371413181215, "grad_norm": 1.6965852975845337, "learning_rate": 9.217337094132115e-06, "loss": 0.8327, "step": 5807 }, { "epoch": 0.20514903593552006, "grad_norm": 1.8761273622512817, "learning_rate": 9.217029794076311e-06, "loss": 0.8396, "step": 5808 }, { "epoch": 0.20518435773922794, "grad_norm": 1.6745920181274414, "learning_rate": 9.216722438828813e-06, "loss": 0.8516, "step": 5809 }, { "epoch": 0.20521967954293585, "grad_norm": 1.941945195198059, "learning_rate": 9.216415028393647e-06, "loss": 0.8599, "step": 5810 }, { "epoch": 0.20525500134664376, "grad_norm": 1.9898444414138794, "learning_rate": 9.216107562774833e-06, "loss": 0.8674, "step": 5811 }, { "epoch": 0.20529032315035167, "grad_norm": 1.6318713426589966, "learning_rate": 9.215800041976396e-06, "loss": 0.8337, "step": 5812 }, { "epoch": 0.20532564495405958, "grad_norm": 1.6695011854171753, "learning_rate": 9.215492466002359e-06, "loss": 0.8835, "step": 5813 }, { "epoch": 0.2053609667577675, "grad_norm": 1.8745664358139038, "learning_rate": 9.21518483485675e-06, "loss": 0.8666, "step": 5814 }, { "epoch": 0.2053962885614754, "grad_norm": 1.794775128364563, "learning_rate": 9.214877148543595e-06, "loss": 0.8803, "step": 5815 }, { "epoch": 0.2054316103651833, "grad_norm": 1.9735932350158691, "learning_rate": 9.214569407066918e-06, "loss": 0.8153, "step": 5816 }, { "epoch": 0.2054669321688912, "grad_norm": 1.6896417140960693, "learning_rate": 9.21426161043075e-06, "loss": 0.8442, "step": 5817 }, { "epoch": 0.2055022539725991, "grad_norm": 1.54423987865448, "learning_rate": 9.21395375863912e-06, "loss": 0.844, "step": 5818 }, { "epoch": 0.20553757577630702, "grad_norm": 1.6225095987319946, "learning_rate": 9.213645851696052e-06, "loss": 0.82, "step": 5819 }, { "epoch": 0.20557289758001493, "grad_norm": 1.647756814956665, "learning_rate": 9.213337889605582e-06, "loss": 0.8565, "step": 5820 }, { "epoch": 0.20560821938372284, "grad_norm": 1.717777132987976, "learning_rate": 9.213029872371737e-06, "loss": 0.8629, "step": 5821 }, { "epoch": 0.20564354118743072, "grad_norm": 1.7709652185440063, "learning_rate": 9.212721799998548e-06, "loss": 0.8607, "step": 5822 }, { "epoch": 0.20567886299113863, "grad_norm": 1.808130145072937, "learning_rate": 9.212413672490049e-06, "loss": 0.8762, "step": 5823 }, { "epoch": 0.20571418479484654, "grad_norm": 1.9820936918258667, "learning_rate": 9.212105489850271e-06, "loss": 0.8621, "step": 5824 }, { "epoch": 0.20574950659855445, "grad_norm": 1.6984390020370483, "learning_rate": 9.211797252083248e-06, "loss": 0.8559, "step": 5825 }, { "epoch": 0.20578482840226237, "grad_norm": 1.7643688917160034, "learning_rate": 9.211488959193015e-06, "loss": 0.8371, "step": 5826 }, { "epoch": 0.20582015020597028, "grad_norm": 1.9113163948059082, "learning_rate": 9.211180611183607e-06, "loss": 0.8687, "step": 5827 }, { "epoch": 0.2058554720096782, "grad_norm": 1.6173590421676636, "learning_rate": 9.210872208059056e-06, "loss": 0.8695, "step": 5828 }, { "epoch": 0.20589079381338607, "grad_norm": 1.848657488822937, "learning_rate": 9.210563749823404e-06, "loss": 0.8977, "step": 5829 }, { "epoch": 0.20592611561709398, "grad_norm": 1.6907801628112793, "learning_rate": 9.210255236480685e-06, "loss": 0.8271, "step": 5830 }, { "epoch": 0.2059614374208019, "grad_norm": 1.7825610637664795, "learning_rate": 9.209946668034935e-06, "loss": 0.8175, "step": 5831 }, { "epoch": 0.2059967592245098, "grad_norm": 1.819852352142334, "learning_rate": 9.209638044490195e-06, "loss": 0.8573, "step": 5832 }, { "epoch": 0.2060320810282177, "grad_norm": 1.1300092935562134, "learning_rate": 9.209329365850504e-06, "loss": 0.6261, "step": 5833 }, { "epoch": 0.20606740283192562, "grad_norm": 1.7652429342269897, "learning_rate": 9.2090206321199e-06, "loss": 0.8882, "step": 5834 }, { "epoch": 0.2061027246356335, "grad_norm": 1.8960607051849365, "learning_rate": 9.208711843302428e-06, "loss": 0.8294, "step": 5835 }, { "epoch": 0.20613804643934142, "grad_norm": 1.6511802673339844, "learning_rate": 9.208402999402125e-06, "loss": 0.856, "step": 5836 }, { "epoch": 0.20617336824304933, "grad_norm": 1.6304314136505127, "learning_rate": 9.208094100423035e-06, "loss": 0.8386, "step": 5837 }, { "epoch": 0.20620869004675724, "grad_norm": 2.001643419265747, "learning_rate": 9.2077851463692e-06, "loss": 0.8487, "step": 5838 }, { "epoch": 0.20624401185046515, "grad_norm": 1.8197449445724487, "learning_rate": 9.207476137244664e-06, "loss": 0.8642, "step": 5839 }, { "epoch": 0.20627933365417306, "grad_norm": 1.8384616374969482, "learning_rate": 9.207167073053472e-06, "loss": 0.8529, "step": 5840 }, { "epoch": 0.20631465545788097, "grad_norm": 1.7771992683410645, "learning_rate": 9.206857953799667e-06, "loss": 0.8334, "step": 5841 }, { "epoch": 0.20634997726158885, "grad_norm": 1.7242720127105713, "learning_rate": 9.206548779487296e-06, "loss": 0.8568, "step": 5842 }, { "epoch": 0.20638529906529676, "grad_norm": 1.7852452993392944, "learning_rate": 9.206239550120406e-06, "loss": 0.9094, "step": 5843 }, { "epoch": 0.20642062086900467, "grad_norm": 1.7226587533950806, "learning_rate": 9.205930265703044e-06, "loss": 0.8258, "step": 5844 }, { "epoch": 0.20645594267271258, "grad_norm": 1.8565949201583862, "learning_rate": 9.205620926239256e-06, "loss": 0.8251, "step": 5845 }, { "epoch": 0.2064912644764205, "grad_norm": 1.9198704957962036, "learning_rate": 9.205311531733093e-06, "loss": 0.8766, "step": 5846 }, { "epoch": 0.2065265862801284, "grad_norm": 1.840665578842163, "learning_rate": 9.205002082188603e-06, "loss": 0.8366, "step": 5847 }, { "epoch": 0.2065619080838363, "grad_norm": 1.5562057495117188, "learning_rate": 9.204692577609836e-06, "loss": 0.8441, "step": 5848 }, { "epoch": 0.2065972298875442, "grad_norm": 1.6717876195907593, "learning_rate": 9.204383018000842e-06, "loss": 0.8404, "step": 5849 }, { "epoch": 0.2066325516912521, "grad_norm": 1.7050590515136719, "learning_rate": 9.204073403365675e-06, "loss": 0.8711, "step": 5850 }, { "epoch": 0.20666787349496002, "grad_norm": 1.7074778079986572, "learning_rate": 9.203763733708384e-06, "loss": 0.8208, "step": 5851 }, { "epoch": 0.20670319529866793, "grad_norm": 1.6860203742980957, "learning_rate": 9.203454009033025e-06, "loss": 0.872, "step": 5852 }, { "epoch": 0.20673851710237584, "grad_norm": 1.669791340827942, "learning_rate": 9.20314422934365e-06, "loss": 0.8258, "step": 5853 }, { "epoch": 0.20677383890608375, "grad_norm": 1.564041256904602, "learning_rate": 9.202834394644313e-06, "loss": 0.8464, "step": 5854 }, { "epoch": 0.20680916070979163, "grad_norm": 1.735215425491333, "learning_rate": 9.202524504939069e-06, "loss": 0.8062, "step": 5855 }, { "epoch": 0.20684448251349954, "grad_norm": 1.7852932214736938, "learning_rate": 9.202214560231975e-06, "loss": 0.8565, "step": 5856 }, { "epoch": 0.20687980431720746, "grad_norm": 1.7571725845336914, "learning_rate": 9.201904560527085e-06, "loss": 0.8534, "step": 5857 }, { "epoch": 0.20691512612091537, "grad_norm": 1.759315848350525, "learning_rate": 9.20159450582846e-06, "loss": 0.8611, "step": 5858 }, { "epoch": 0.20695044792462328, "grad_norm": 1.7278318405151367, "learning_rate": 9.201284396140154e-06, "loss": 0.8336, "step": 5859 }, { "epoch": 0.2069857697283312, "grad_norm": 1.7735393047332764, "learning_rate": 9.20097423146623e-06, "loss": 0.8387, "step": 5860 }, { "epoch": 0.20702109153203907, "grad_norm": 2.0248842239379883, "learning_rate": 9.200664011810743e-06, "loss": 0.8703, "step": 5861 }, { "epoch": 0.20705641333574698, "grad_norm": 1.719590425491333, "learning_rate": 9.200353737177757e-06, "loss": 0.818, "step": 5862 }, { "epoch": 0.2070917351394549, "grad_norm": 1.8615927696228027, "learning_rate": 9.200043407571328e-06, "loss": 0.8666, "step": 5863 }, { "epoch": 0.2071270569431628, "grad_norm": 1.5829269886016846, "learning_rate": 9.199733022995523e-06, "loss": 0.8318, "step": 5864 }, { "epoch": 0.2071623787468707, "grad_norm": 1.1629918813705444, "learning_rate": 9.1994225834544e-06, "loss": 0.6099, "step": 5865 }, { "epoch": 0.20719770055057862, "grad_norm": 1.9022819995880127, "learning_rate": 9.199112088952025e-06, "loss": 0.8809, "step": 5866 }, { "epoch": 0.20723302235428653, "grad_norm": 1.8653477430343628, "learning_rate": 9.198801539492459e-06, "loss": 0.8913, "step": 5867 }, { "epoch": 0.20726834415799442, "grad_norm": 1.5591238737106323, "learning_rate": 9.198490935079768e-06, "loss": 0.8253, "step": 5868 }, { "epoch": 0.20730366596170233, "grad_norm": 1.760748267173767, "learning_rate": 9.198180275718018e-06, "loss": 0.841, "step": 5869 }, { "epoch": 0.20733898776541024, "grad_norm": 1.8204927444458008, "learning_rate": 9.197869561411273e-06, "loss": 0.8835, "step": 5870 }, { "epoch": 0.20737430956911815, "grad_norm": 1.6602740287780762, "learning_rate": 9.197558792163601e-06, "loss": 0.8536, "step": 5871 }, { "epoch": 0.20740963137282606, "grad_norm": 1.7192093133926392, "learning_rate": 9.197247967979068e-06, "loss": 0.8407, "step": 5872 }, { "epoch": 0.20744495317653397, "grad_norm": 1.8487379550933838, "learning_rate": 9.196937088861743e-06, "loss": 0.8812, "step": 5873 }, { "epoch": 0.20748027498024185, "grad_norm": 1.586553692817688, "learning_rate": 9.196626154815694e-06, "loss": 0.8389, "step": 5874 }, { "epoch": 0.20751559678394976, "grad_norm": 1.7064738273620605, "learning_rate": 9.196315165844988e-06, "loss": 0.8624, "step": 5875 }, { "epoch": 0.20755091858765767, "grad_norm": 1.6154959201812744, "learning_rate": 9.196004121953702e-06, "loss": 0.8558, "step": 5876 }, { "epoch": 0.20758624039136558, "grad_norm": 1.848044514656067, "learning_rate": 9.195693023145902e-06, "loss": 0.8239, "step": 5877 }, { "epoch": 0.2076215621950735, "grad_norm": 1.8510302305221558, "learning_rate": 9.195381869425659e-06, "loss": 0.8788, "step": 5878 }, { "epoch": 0.2076568839987814, "grad_norm": 1.780007004737854, "learning_rate": 9.195070660797047e-06, "loss": 0.8818, "step": 5879 }, { "epoch": 0.20769220580248932, "grad_norm": 1.8582499027252197, "learning_rate": 9.19475939726414e-06, "loss": 0.9193, "step": 5880 }, { "epoch": 0.2077275276061972, "grad_norm": 2.06428861618042, "learning_rate": 9.194448078831011e-06, "loss": 0.8059, "step": 5881 }, { "epoch": 0.2077628494099051, "grad_norm": 2.416426420211792, "learning_rate": 9.194136705501732e-06, "loss": 0.8686, "step": 5882 }, { "epoch": 0.20779817121361302, "grad_norm": 1.726025938987732, "learning_rate": 9.19382527728038e-06, "loss": 0.8691, "step": 5883 }, { "epoch": 0.20783349301732093, "grad_norm": 1.5374548435211182, "learning_rate": 9.193513794171032e-06, "loss": 0.8196, "step": 5884 }, { "epoch": 0.20786881482102884, "grad_norm": 1.910395622253418, "learning_rate": 9.193202256177765e-06, "loss": 0.8673, "step": 5885 }, { "epoch": 0.20790413662473675, "grad_norm": 1.9426113367080688, "learning_rate": 9.192890663304654e-06, "loss": 0.8485, "step": 5886 }, { "epoch": 0.20793945842844466, "grad_norm": 1.719195008277893, "learning_rate": 9.192579015555779e-06, "loss": 0.8689, "step": 5887 }, { "epoch": 0.20797478023215255, "grad_norm": 1.6143509149551392, "learning_rate": 9.192267312935216e-06, "loss": 0.8486, "step": 5888 }, { "epoch": 0.20801010203586046, "grad_norm": 1.5807766914367676, "learning_rate": 9.191955555447047e-06, "loss": 0.8198, "step": 5889 }, { "epoch": 0.20804542383956837, "grad_norm": 1.6006784439086914, "learning_rate": 9.191643743095353e-06, "loss": 0.8595, "step": 5890 }, { "epoch": 0.20808074564327628, "grad_norm": 1.626648187637329, "learning_rate": 9.191331875884214e-06, "loss": 0.8033, "step": 5891 }, { "epoch": 0.2081160674469842, "grad_norm": 1.7384883165359497, "learning_rate": 9.191019953817709e-06, "loss": 0.8453, "step": 5892 }, { "epoch": 0.2081513892506921, "grad_norm": 2.8266916275024414, "learning_rate": 9.190707976899924e-06, "loss": 0.8705, "step": 5893 }, { "epoch": 0.20818671105439998, "grad_norm": 1.6477466821670532, "learning_rate": 9.190395945134942e-06, "loss": 0.868, "step": 5894 }, { "epoch": 0.2082220328581079, "grad_norm": 1.6666055917739868, "learning_rate": 9.190083858526844e-06, "loss": 0.8206, "step": 5895 }, { "epoch": 0.2082573546618158, "grad_norm": 1.601347804069519, "learning_rate": 9.189771717079716e-06, "loss": 0.8166, "step": 5896 }, { "epoch": 0.2082926764655237, "grad_norm": 1.8789492845535278, "learning_rate": 9.189459520797644e-06, "loss": 0.8923, "step": 5897 }, { "epoch": 0.20832799826923162, "grad_norm": 1.9191731214523315, "learning_rate": 9.189147269684713e-06, "loss": 0.8711, "step": 5898 }, { "epoch": 0.20836332007293953, "grad_norm": 1.822627305984497, "learning_rate": 9.188834963745011e-06, "loss": 0.8662, "step": 5899 }, { "epoch": 0.20839864187664744, "grad_norm": 1.522153377532959, "learning_rate": 9.188522602982622e-06, "loss": 0.8347, "step": 5900 }, { "epoch": 0.20843396368035533, "grad_norm": 1.7643917798995972, "learning_rate": 9.188210187401639e-06, "loss": 0.8256, "step": 5901 }, { "epoch": 0.20846928548406324, "grad_norm": 2.0169451236724854, "learning_rate": 9.187897717006147e-06, "loss": 0.8386, "step": 5902 }, { "epoch": 0.20850460728777115, "grad_norm": 1.826959252357483, "learning_rate": 9.187585191800237e-06, "loss": 0.8168, "step": 5903 }, { "epoch": 0.20853992909147906, "grad_norm": 1.6197309494018555, "learning_rate": 9.187272611787999e-06, "loss": 0.8089, "step": 5904 }, { "epoch": 0.20857525089518697, "grad_norm": 1.9074223041534424, "learning_rate": 9.186959976973524e-06, "loss": 0.8823, "step": 5905 }, { "epoch": 0.20861057269889488, "grad_norm": 1.7359696626663208, "learning_rate": 9.186647287360906e-06, "loss": 0.8152, "step": 5906 }, { "epoch": 0.20864589450260276, "grad_norm": 2.045571804046631, "learning_rate": 9.186334542954233e-06, "loss": 0.8585, "step": 5907 }, { "epoch": 0.20868121630631067, "grad_norm": 1.6428420543670654, "learning_rate": 9.186021743757602e-06, "loss": 0.8515, "step": 5908 }, { "epoch": 0.20871653811001858, "grad_norm": 1.741131067276001, "learning_rate": 9.185708889775105e-06, "loss": 0.8604, "step": 5909 }, { "epoch": 0.2087518599137265, "grad_norm": 1.8287822008132935, "learning_rate": 9.185395981010837e-06, "loss": 0.8685, "step": 5910 }, { "epoch": 0.2087871817174344, "grad_norm": 1.6955127716064453, "learning_rate": 9.18508301746889e-06, "loss": 0.8151, "step": 5911 }, { "epoch": 0.20882250352114232, "grad_norm": 1.7297073602676392, "learning_rate": 9.184769999153367e-06, "loss": 0.8225, "step": 5912 }, { "epoch": 0.20885782532485023, "grad_norm": 1.7468085289001465, "learning_rate": 9.18445692606836e-06, "loss": 0.8388, "step": 5913 }, { "epoch": 0.2088931471285581, "grad_norm": 1.7296808958053589, "learning_rate": 9.184143798217966e-06, "loss": 0.8788, "step": 5914 }, { "epoch": 0.20892846893226602, "grad_norm": 1.6828807592391968, "learning_rate": 9.183830615606286e-06, "loss": 0.8399, "step": 5915 }, { "epoch": 0.20896379073597393, "grad_norm": 1.6641989946365356, "learning_rate": 9.183517378237417e-06, "loss": 0.8417, "step": 5916 }, { "epoch": 0.20899911253968184, "grad_norm": 1.7872354984283447, "learning_rate": 9.18320408611546e-06, "loss": 0.8469, "step": 5917 }, { "epoch": 0.20903443434338975, "grad_norm": 1.7797260284423828, "learning_rate": 9.182890739244512e-06, "loss": 0.8606, "step": 5918 }, { "epoch": 0.20906975614709766, "grad_norm": 1.5945883989334106, "learning_rate": 9.182577337628677e-06, "loss": 0.8404, "step": 5919 }, { "epoch": 0.20910507795080555, "grad_norm": 1.738352656364441, "learning_rate": 9.182263881272058e-06, "loss": 0.8789, "step": 5920 }, { "epoch": 0.20914039975451346, "grad_norm": 1.6361809968948364, "learning_rate": 9.181950370178754e-06, "loss": 0.8487, "step": 5921 }, { "epoch": 0.20917572155822137, "grad_norm": 1.7533750534057617, "learning_rate": 9.181636804352868e-06, "loss": 0.8359, "step": 5922 }, { "epoch": 0.20921104336192928, "grad_norm": 1.685958981513977, "learning_rate": 9.181323183798509e-06, "loss": 0.8396, "step": 5923 }, { "epoch": 0.2092463651656372, "grad_norm": 1.7405675649642944, "learning_rate": 9.181009508519776e-06, "loss": 0.8874, "step": 5924 }, { "epoch": 0.2092816869693451, "grad_norm": 1.6964665651321411, "learning_rate": 9.180695778520777e-06, "loss": 0.8912, "step": 5925 }, { "epoch": 0.209317008773053, "grad_norm": 1.7219876050949097, "learning_rate": 9.180381993805616e-06, "loss": 0.8193, "step": 5926 }, { "epoch": 0.2093523305767609, "grad_norm": 1.8734127283096313, "learning_rate": 9.180068154378405e-06, "loss": 0.8734, "step": 5927 }, { "epoch": 0.2093876523804688, "grad_norm": 1.003966212272644, "learning_rate": 9.179754260243244e-06, "loss": 0.5969, "step": 5928 }, { "epoch": 0.2094229741841767, "grad_norm": 1.5909134149551392, "learning_rate": 9.179440311404246e-06, "loss": 0.8554, "step": 5929 }, { "epoch": 0.20945829598788462, "grad_norm": 3.8491952419281006, "learning_rate": 9.17912630786552e-06, "loss": 0.8481, "step": 5930 }, { "epoch": 0.20949361779159253, "grad_norm": 1.7686482667922974, "learning_rate": 9.178812249631174e-06, "loss": 0.8542, "step": 5931 }, { "epoch": 0.20952893959530045, "grad_norm": 1.8416262865066528, "learning_rate": 9.178498136705319e-06, "loss": 0.8545, "step": 5932 }, { "epoch": 0.20956426139900833, "grad_norm": 1.644454002380371, "learning_rate": 9.178183969092065e-06, "loss": 0.8628, "step": 5933 }, { "epoch": 0.20959958320271624, "grad_norm": 1.7457889318466187, "learning_rate": 9.177869746795525e-06, "loss": 0.8519, "step": 5934 }, { "epoch": 0.20963490500642415, "grad_norm": 1.675180435180664, "learning_rate": 9.177555469819813e-06, "loss": 0.8532, "step": 5935 }, { "epoch": 0.20967022681013206, "grad_norm": 1.615103006362915, "learning_rate": 9.177241138169037e-06, "loss": 0.8354, "step": 5936 }, { "epoch": 0.20970554861383997, "grad_norm": 1.626003623008728, "learning_rate": 9.176926751847316e-06, "loss": 0.8513, "step": 5937 }, { "epoch": 0.20974087041754788, "grad_norm": 2.321148157119751, "learning_rate": 9.176612310858763e-06, "loss": 0.8147, "step": 5938 }, { "epoch": 0.2097761922212558, "grad_norm": 1.6698278188705444, "learning_rate": 9.176297815207493e-06, "loss": 0.8145, "step": 5939 }, { "epoch": 0.20981151402496367, "grad_norm": 1.6940897703170776, "learning_rate": 9.175983264897621e-06, "loss": 0.849, "step": 5940 }, { "epoch": 0.20984683582867159, "grad_norm": 1.7824875116348267, "learning_rate": 9.175668659933266e-06, "loss": 0.8306, "step": 5941 }, { "epoch": 0.2098821576323795, "grad_norm": 2.084200143814087, "learning_rate": 9.175354000318545e-06, "loss": 0.8428, "step": 5942 }, { "epoch": 0.2099174794360874, "grad_norm": 1.6199901103973389, "learning_rate": 9.175039286057574e-06, "loss": 0.8555, "step": 5943 }, { "epoch": 0.20995280123979532, "grad_norm": 1.6863210201263428, "learning_rate": 9.174724517154476e-06, "loss": 0.8292, "step": 5944 }, { "epoch": 0.20998812304350323, "grad_norm": 1.7441803216934204, "learning_rate": 9.174409693613367e-06, "loss": 0.8165, "step": 5945 }, { "epoch": 0.2100234448472111, "grad_norm": 1.8282524347305298, "learning_rate": 9.174094815438369e-06, "loss": 0.8763, "step": 5946 }, { "epoch": 0.21005876665091902, "grad_norm": 1.5476313829421997, "learning_rate": 9.173779882633602e-06, "loss": 0.8315, "step": 5947 }, { "epoch": 0.21009408845462693, "grad_norm": 2.322282314300537, "learning_rate": 9.17346489520319e-06, "loss": 0.8164, "step": 5948 }, { "epoch": 0.21012941025833484, "grad_norm": 2.0103683471679688, "learning_rate": 9.173149853151254e-06, "loss": 0.854, "step": 5949 }, { "epoch": 0.21016473206204275, "grad_norm": 1.8798905611038208, "learning_rate": 9.172834756481914e-06, "loss": 0.841, "step": 5950 }, { "epoch": 0.21020005386575066, "grad_norm": 2.000751256942749, "learning_rate": 9.172519605199299e-06, "loss": 0.8701, "step": 5951 }, { "epoch": 0.21023537566945857, "grad_norm": 1.6621078252792358, "learning_rate": 9.172204399307534e-06, "loss": 0.8817, "step": 5952 }, { "epoch": 0.21027069747316646, "grad_norm": 1.8487967252731323, "learning_rate": 9.171889138810739e-06, "loss": 0.8361, "step": 5953 }, { "epoch": 0.21030601927687437, "grad_norm": 1.748806357383728, "learning_rate": 9.171573823713044e-06, "loss": 0.8283, "step": 5954 }, { "epoch": 0.21034134108058228, "grad_norm": 1.7555960416793823, "learning_rate": 9.171258454018577e-06, "loss": 0.8305, "step": 5955 }, { "epoch": 0.2103766628842902, "grad_norm": 1.6617497205734253, "learning_rate": 9.170943029731461e-06, "loss": 0.8373, "step": 5956 }, { "epoch": 0.2104119846879981, "grad_norm": 1.7764474153518677, "learning_rate": 9.170627550855828e-06, "loss": 0.8274, "step": 5957 }, { "epoch": 0.210447306491706, "grad_norm": 1.7403143644332886, "learning_rate": 9.170312017395804e-06, "loss": 0.8628, "step": 5958 }, { "epoch": 0.2104826282954139, "grad_norm": 1.6727150678634644, "learning_rate": 9.169996429355522e-06, "loss": 0.8549, "step": 5959 }, { "epoch": 0.2105179500991218, "grad_norm": 2.0292932987213135, "learning_rate": 9.16968078673911e-06, "loss": 0.8392, "step": 5960 }, { "epoch": 0.21055327190282971, "grad_norm": 1.5919852256774902, "learning_rate": 9.1693650895507e-06, "loss": 0.8443, "step": 5961 }, { "epoch": 0.21058859370653762, "grad_norm": 1.7257411479949951, "learning_rate": 9.169049337794422e-06, "loss": 0.8974, "step": 5962 }, { "epoch": 0.21062391551024554, "grad_norm": 1.6632219552993774, "learning_rate": 9.168733531474412e-06, "loss": 0.8643, "step": 5963 }, { "epoch": 0.21065923731395345, "grad_norm": 1.6561685800552368, "learning_rate": 9.1684176705948e-06, "loss": 0.8467, "step": 5964 }, { "epoch": 0.21069455911766136, "grad_norm": 1.9154386520385742, "learning_rate": 9.168101755159723e-06, "loss": 0.8763, "step": 5965 }, { "epoch": 0.21072988092136924, "grad_norm": 1.7577259540557861, "learning_rate": 9.16778578517331e-06, "loss": 0.8327, "step": 5966 }, { "epoch": 0.21076520272507715, "grad_norm": 1.8287513256072998, "learning_rate": 9.167469760639702e-06, "loss": 0.8398, "step": 5967 }, { "epoch": 0.21080052452878506, "grad_norm": 1.809733510017395, "learning_rate": 9.167153681563034e-06, "loss": 0.8196, "step": 5968 }, { "epoch": 0.21083584633249297, "grad_norm": 2.3577139377593994, "learning_rate": 9.166837547947441e-06, "loss": 0.8683, "step": 5969 }, { "epoch": 0.21087116813620088, "grad_norm": 1.6887834072113037, "learning_rate": 9.166521359797062e-06, "loss": 0.8633, "step": 5970 }, { "epoch": 0.2109064899399088, "grad_norm": 1.7917366027832031, "learning_rate": 9.166205117116034e-06, "loss": 0.855, "step": 5971 }, { "epoch": 0.21094181174361668, "grad_norm": 2.0739612579345703, "learning_rate": 9.165888819908498e-06, "loss": 0.8061, "step": 5972 }, { "epoch": 0.21097713354732459, "grad_norm": 1.7316462993621826, "learning_rate": 9.165572468178589e-06, "loss": 0.8585, "step": 5973 }, { "epoch": 0.2110124553510325, "grad_norm": 1.7519445419311523, "learning_rate": 9.165256061930455e-06, "loss": 0.8344, "step": 5974 }, { "epoch": 0.2110477771547404, "grad_norm": 1.78372323513031, "learning_rate": 9.16493960116823e-06, "loss": 0.8838, "step": 5975 }, { "epoch": 0.21108309895844832, "grad_norm": 1.969779372215271, "learning_rate": 9.164623085896058e-06, "loss": 0.8437, "step": 5976 }, { "epoch": 0.21111842076215623, "grad_norm": 1.81584894657135, "learning_rate": 9.164306516118082e-06, "loss": 0.8756, "step": 5977 }, { "epoch": 0.21115374256586414, "grad_norm": 1.6730704307556152, "learning_rate": 9.163989891838448e-06, "loss": 0.8511, "step": 5978 }, { "epoch": 0.21118906436957202, "grad_norm": 1.7799289226531982, "learning_rate": 9.163673213061293e-06, "loss": 0.8307, "step": 5979 }, { "epoch": 0.21122438617327993, "grad_norm": 1.7790738344192505, "learning_rate": 9.163356479790768e-06, "loss": 0.8857, "step": 5980 }, { "epoch": 0.21125970797698784, "grad_norm": 1.8786379098892212, "learning_rate": 9.163039692031016e-06, "loss": 0.8586, "step": 5981 }, { "epoch": 0.21129502978069575, "grad_norm": 1.7726023197174072, "learning_rate": 9.162722849786184e-06, "loss": 0.835, "step": 5982 }, { "epoch": 0.21133035158440366, "grad_norm": 1.7336769104003906, "learning_rate": 9.162405953060416e-06, "loss": 0.8969, "step": 5983 }, { "epoch": 0.21136567338811157, "grad_norm": 1.7687979936599731, "learning_rate": 9.162089001857862e-06, "loss": 0.8467, "step": 5984 }, { "epoch": 0.21140099519181946, "grad_norm": 2.0941665172576904, "learning_rate": 9.16177199618267e-06, "loss": 0.8538, "step": 5985 }, { "epoch": 0.21143631699552737, "grad_norm": 1.7314132452011108, "learning_rate": 9.161454936038988e-06, "loss": 0.836, "step": 5986 }, { "epoch": 0.21147163879923528, "grad_norm": 1.9314656257629395, "learning_rate": 9.161137821430965e-06, "loss": 0.8638, "step": 5987 }, { "epoch": 0.2115069606029432, "grad_norm": 1.668806552886963, "learning_rate": 9.160820652362753e-06, "loss": 0.8462, "step": 5988 }, { "epoch": 0.2115422824066511, "grad_norm": 1.7247657775878906, "learning_rate": 9.160503428838503e-06, "loss": 0.8526, "step": 5989 }, { "epoch": 0.211577604210359, "grad_norm": 1.8742965459823608, "learning_rate": 9.160186150862366e-06, "loss": 0.8362, "step": 5990 }, { "epoch": 0.21161292601406692, "grad_norm": 1.7112290859222412, "learning_rate": 9.159868818438495e-06, "loss": 0.8368, "step": 5991 }, { "epoch": 0.2116482478177748, "grad_norm": 1.7383615970611572, "learning_rate": 9.159551431571043e-06, "loss": 0.8286, "step": 5992 }, { "epoch": 0.21168356962148271, "grad_norm": 1.7975744009017944, "learning_rate": 9.159233990264163e-06, "loss": 0.8751, "step": 5993 }, { "epoch": 0.21171889142519063, "grad_norm": 1.899192452430725, "learning_rate": 9.15891649452201e-06, "loss": 0.8597, "step": 5994 }, { "epoch": 0.21175421322889854, "grad_norm": 1.7522691488265991, "learning_rate": 9.15859894434874e-06, "loss": 0.8361, "step": 5995 }, { "epoch": 0.21178953503260645, "grad_norm": 1.8182591199874878, "learning_rate": 9.15828133974851e-06, "loss": 0.848, "step": 5996 }, { "epoch": 0.21182485683631436, "grad_norm": 1.9749841690063477, "learning_rate": 9.157963680725475e-06, "loss": 0.8559, "step": 5997 }, { "epoch": 0.21186017864002224, "grad_norm": 1.7167317867279053, "learning_rate": 9.157645967283792e-06, "loss": 0.8847, "step": 5998 }, { "epoch": 0.21189550044373015, "grad_norm": 1.7022520303726196, "learning_rate": 9.15732819942762e-06, "loss": 0.8253, "step": 5999 }, { "epoch": 0.21193082224743806, "grad_norm": 1.9075425863265991, "learning_rate": 9.15701037716112e-06, "loss": 0.8617, "step": 6000 }, { "epoch": 0.21196614405114597, "grad_norm": 1.874847173690796, "learning_rate": 9.156692500488448e-06, "loss": 0.8629, "step": 6001 }, { "epoch": 0.21200146585485388, "grad_norm": 1.9590917825698853, "learning_rate": 9.156374569413767e-06, "loss": 0.8651, "step": 6002 }, { "epoch": 0.2120367876585618, "grad_norm": 1.6651060581207275, "learning_rate": 9.156056583941236e-06, "loss": 0.8322, "step": 6003 }, { "epoch": 0.2120721094622697, "grad_norm": 1.6878998279571533, "learning_rate": 9.155738544075017e-06, "loss": 0.8111, "step": 6004 }, { "epoch": 0.2121074312659776, "grad_norm": 1.651735544204712, "learning_rate": 9.155420449819274e-06, "loss": 0.8366, "step": 6005 }, { "epoch": 0.2121427530696855, "grad_norm": 2.362987756729126, "learning_rate": 9.155102301178169e-06, "loss": 0.864, "step": 6006 }, { "epoch": 0.2121780748733934, "grad_norm": 1.7157857418060303, "learning_rate": 9.154784098155867e-06, "loss": 0.8174, "step": 6007 }, { "epoch": 0.21221339667710132, "grad_norm": 1.7143021821975708, "learning_rate": 9.15446584075653e-06, "loss": 0.8431, "step": 6008 }, { "epoch": 0.21224871848080923, "grad_norm": 1.7087827920913696, "learning_rate": 9.154147528984327e-06, "loss": 0.8514, "step": 6009 }, { "epoch": 0.21228404028451714, "grad_norm": 1.6705282926559448, "learning_rate": 9.153829162843422e-06, "loss": 0.8368, "step": 6010 }, { "epoch": 0.21231936208822502, "grad_norm": 1.6668949127197266, "learning_rate": 9.153510742337981e-06, "loss": 0.8546, "step": 6011 }, { "epoch": 0.21235468389193293, "grad_norm": 1.6585373878479004, "learning_rate": 9.153192267472173e-06, "loss": 0.8238, "step": 6012 }, { "epoch": 0.21239000569564084, "grad_norm": 1.6806765794754028, "learning_rate": 9.152873738250164e-06, "loss": 0.83, "step": 6013 }, { "epoch": 0.21242532749934875, "grad_norm": 1.6142609119415283, "learning_rate": 9.152555154676125e-06, "loss": 0.8615, "step": 6014 }, { "epoch": 0.21246064930305666, "grad_norm": 1.704506278038025, "learning_rate": 9.152236516754225e-06, "loss": 0.8687, "step": 6015 }, { "epoch": 0.21249597110676458, "grad_norm": 1.5380849838256836, "learning_rate": 9.151917824488631e-06, "loss": 0.8776, "step": 6016 }, { "epoch": 0.21253129291047249, "grad_norm": 1.1063232421875, "learning_rate": 9.151599077883522e-06, "loss": 0.5897, "step": 6017 }, { "epoch": 0.21256661471418037, "grad_norm": 1.7120651006698608, "learning_rate": 9.15128027694306e-06, "loss": 0.8782, "step": 6018 }, { "epoch": 0.21260193651788828, "grad_norm": 1.622797966003418, "learning_rate": 9.150961421671427e-06, "loss": 0.8599, "step": 6019 }, { "epoch": 0.2126372583215962, "grad_norm": 1.7465883493423462, "learning_rate": 9.150642512072787e-06, "loss": 0.8594, "step": 6020 }, { "epoch": 0.2126725801253041, "grad_norm": 1.7256592512130737, "learning_rate": 9.150323548151318e-06, "loss": 0.8496, "step": 6021 }, { "epoch": 0.212707901929012, "grad_norm": 1.6440154314041138, "learning_rate": 9.150004529911198e-06, "loss": 0.8529, "step": 6022 }, { "epoch": 0.21274322373271992, "grad_norm": 1.7807952165603638, "learning_rate": 9.149685457356596e-06, "loss": 0.8221, "step": 6023 }, { "epoch": 0.2127785455364278, "grad_norm": 1.8291914463043213, "learning_rate": 9.149366330491693e-06, "loss": 0.831, "step": 6024 }, { "epoch": 0.21281386734013572, "grad_norm": 1.670656681060791, "learning_rate": 9.149047149320662e-06, "loss": 0.8278, "step": 6025 }, { "epoch": 0.21284918914384363, "grad_norm": 1.901637315750122, "learning_rate": 9.148727913847682e-06, "loss": 0.9026, "step": 6026 }, { "epoch": 0.21288451094755154, "grad_norm": 1.749842882156372, "learning_rate": 9.14840862407693e-06, "loss": 0.8597, "step": 6027 }, { "epoch": 0.21291983275125945, "grad_norm": 1.6432433128356934, "learning_rate": 9.148089280012587e-06, "loss": 0.8374, "step": 6028 }, { "epoch": 0.21295515455496736, "grad_norm": 1.6701732873916626, "learning_rate": 9.14776988165883e-06, "loss": 0.8329, "step": 6029 }, { "epoch": 0.21299047635867527, "grad_norm": 3.1509249210357666, "learning_rate": 9.147450429019841e-06, "loss": 0.8336, "step": 6030 }, { "epoch": 0.21302579816238315, "grad_norm": 2.092789649963379, "learning_rate": 9.147130922099804e-06, "loss": 0.8724, "step": 6031 }, { "epoch": 0.21306111996609106, "grad_norm": 1.7293822765350342, "learning_rate": 9.146811360902892e-06, "loss": 0.8453, "step": 6032 }, { "epoch": 0.21309644176979897, "grad_norm": 1.617277979850769, "learning_rate": 9.146491745433295e-06, "loss": 0.8271, "step": 6033 }, { "epoch": 0.21313176357350688, "grad_norm": 1.7241264581680298, "learning_rate": 9.146172075695192e-06, "loss": 0.8627, "step": 6034 }, { "epoch": 0.2131670853772148, "grad_norm": 1.5897903442382812, "learning_rate": 9.14585235169277e-06, "loss": 0.8716, "step": 6035 }, { "epoch": 0.2132024071809227, "grad_norm": 1.6798362731933594, "learning_rate": 9.145532573430209e-06, "loss": 0.8196, "step": 6036 }, { "epoch": 0.2132377289846306, "grad_norm": 1.764794945716858, "learning_rate": 9.145212740911699e-06, "loss": 0.8817, "step": 6037 }, { "epoch": 0.2132730507883385, "grad_norm": 1.755545735359192, "learning_rate": 9.144892854141425e-06, "loss": 0.8482, "step": 6038 }, { "epoch": 0.2133083725920464, "grad_norm": 1.7620292901992798, "learning_rate": 9.144572913123571e-06, "loss": 0.8575, "step": 6039 }, { "epoch": 0.21334369439575432, "grad_norm": 1.4636412858963013, "learning_rate": 9.144252917862327e-06, "loss": 0.8369, "step": 6040 }, { "epoch": 0.21337901619946223, "grad_norm": 1.674180030822754, "learning_rate": 9.143932868361877e-06, "loss": 0.8302, "step": 6041 }, { "epoch": 0.21341433800317014, "grad_norm": 1.6982759237289429, "learning_rate": 9.143612764626416e-06, "loss": 0.8518, "step": 6042 }, { "epoch": 0.21344965980687805, "grad_norm": 1.8123266696929932, "learning_rate": 9.143292606660127e-06, "loss": 0.8869, "step": 6043 }, { "epoch": 0.21348498161058593, "grad_norm": 1.4708784818649292, "learning_rate": 9.142972394467205e-06, "loss": 0.8169, "step": 6044 }, { "epoch": 0.21352030341429384, "grad_norm": 1.6367546319961548, "learning_rate": 9.142652128051839e-06, "loss": 0.8249, "step": 6045 }, { "epoch": 0.21355562521800175, "grad_norm": 1.7206648588180542, "learning_rate": 9.14233180741822e-06, "loss": 0.8855, "step": 6046 }, { "epoch": 0.21359094702170967, "grad_norm": 1.6319330930709839, "learning_rate": 9.142011432570541e-06, "loss": 0.8291, "step": 6047 }, { "epoch": 0.21362626882541758, "grad_norm": 1.7223402261734009, "learning_rate": 9.141691003512996e-06, "loss": 0.8515, "step": 6048 }, { "epoch": 0.2136615906291255, "grad_norm": 1.4780349731445312, "learning_rate": 9.141370520249777e-06, "loss": 0.8458, "step": 6049 }, { "epoch": 0.21369691243283337, "grad_norm": 1.8564010858535767, "learning_rate": 9.14104998278508e-06, "loss": 0.8338, "step": 6050 }, { "epoch": 0.21373223423654128, "grad_norm": 3.3161394596099854, "learning_rate": 9.140729391123098e-06, "loss": 0.8557, "step": 6051 }, { "epoch": 0.2137675560402492, "grad_norm": 1.7629426717758179, "learning_rate": 9.140408745268027e-06, "loss": 0.8714, "step": 6052 }, { "epoch": 0.2138028778439571, "grad_norm": 1.6759458780288696, "learning_rate": 9.140088045224068e-06, "loss": 0.8302, "step": 6053 }, { "epoch": 0.213838199647665, "grad_norm": 5.417496681213379, "learning_rate": 9.139767290995411e-06, "loss": 0.8101, "step": 6054 }, { "epoch": 0.21387352145137292, "grad_norm": 2.0554914474487305, "learning_rate": 9.139446482586261e-06, "loss": 0.8479, "step": 6055 }, { "epoch": 0.21390884325508083, "grad_norm": 1.7760218381881714, "learning_rate": 9.139125620000813e-06, "loss": 0.8864, "step": 6056 }, { "epoch": 0.21394416505878872, "grad_norm": 1.6869038343429565, "learning_rate": 9.138804703243267e-06, "loss": 0.8162, "step": 6057 }, { "epoch": 0.21397948686249663, "grad_norm": 1.7709152698516846, "learning_rate": 9.138483732317823e-06, "loss": 0.852, "step": 6058 }, { "epoch": 0.21401480866620454, "grad_norm": 1.6791863441467285, "learning_rate": 9.13816270722868e-06, "loss": 0.838, "step": 6059 }, { "epoch": 0.21405013046991245, "grad_norm": 1.6903058290481567, "learning_rate": 9.137841627980044e-06, "loss": 0.8613, "step": 6060 }, { "epoch": 0.21408545227362036, "grad_norm": 1.749637484550476, "learning_rate": 9.137520494576114e-06, "loss": 0.8438, "step": 6061 }, { "epoch": 0.21412077407732827, "grad_norm": 1.6627534627914429, "learning_rate": 9.137199307021092e-06, "loss": 0.8325, "step": 6062 }, { "epoch": 0.21415609588103615, "grad_norm": 1.7014199495315552, "learning_rate": 9.136878065319183e-06, "loss": 0.8424, "step": 6063 }, { "epoch": 0.21419141768474406, "grad_norm": 1.673063039779663, "learning_rate": 9.136556769474594e-06, "loss": 0.8575, "step": 6064 }, { "epoch": 0.21422673948845197, "grad_norm": 1.8347160816192627, "learning_rate": 9.136235419491525e-06, "loss": 0.8492, "step": 6065 }, { "epoch": 0.21426206129215988, "grad_norm": 1.800008773803711, "learning_rate": 9.135914015374187e-06, "loss": 0.8576, "step": 6066 }, { "epoch": 0.2142973830958678, "grad_norm": 1.684461236000061, "learning_rate": 9.135592557126782e-06, "loss": 0.8292, "step": 6067 }, { "epoch": 0.2143327048995757, "grad_norm": 1.8353979587554932, "learning_rate": 9.13527104475352e-06, "loss": 0.8516, "step": 6068 }, { "epoch": 0.21436802670328362, "grad_norm": 1.6567679643630981, "learning_rate": 9.134949478258607e-06, "loss": 0.8394, "step": 6069 }, { "epoch": 0.2144033485069915, "grad_norm": 1.9379388093948364, "learning_rate": 9.134627857646253e-06, "loss": 0.8512, "step": 6070 }, { "epoch": 0.2144386703106994, "grad_norm": 1.7397499084472656, "learning_rate": 9.134306182920668e-06, "loss": 0.8357, "step": 6071 }, { "epoch": 0.21447399211440732, "grad_norm": 2.2227351665496826, "learning_rate": 9.133984454086058e-06, "loss": 0.8401, "step": 6072 }, { "epoch": 0.21450931391811523, "grad_norm": 1.7896455526351929, "learning_rate": 9.133662671146638e-06, "loss": 0.8377, "step": 6073 }, { "epoch": 0.21454463572182314, "grad_norm": 1.6442251205444336, "learning_rate": 9.133340834106619e-06, "loss": 0.8488, "step": 6074 }, { "epoch": 0.21457995752553105, "grad_norm": 1.7891885042190552, "learning_rate": 9.133018942970211e-06, "loss": 0.8557, "step": 6075 }, { "epoch": 0.21461527932923893, "grad_norm": 1.5080883502960205, "learning_rate": 9.132696997741627e-06, "loss": 0.8057, "step": 6076 }, { "epoch": 0.21465060113294684, "grad_norm": 1.70392644405365, "learning_rate": 9.132374998425084e-06, "loss": 0.8648, "step": 6077 }, { "epoch": 0.21468592293665476, "grad_norm": 2.13134503364563, "learning_rate": 9.132052945024792e-06, "loss": 0.8779, "step": 6078 }, { "epoch": 0.21472124474036267, "grad_norm": 1.6383603811264038, "learning_rate": 9.13173083754497e-06, "loss": 0.8622, "step": 6079 }, { "epoch": 0.21475656654407058, "grad_norm": 1.6122698783874512, "learning_rate": 9.13140867598983e-06, "loss": 0.9035, "step": 6080 }, { "epoch": 0.2147918883477785, "grad_norm": 1.8638579845428467, "learning_rate": 9.131086460363589e-06, "loss": 0.8739, "step": 6081 }, { "epoch": 0.2148272101514864, "grad_norm": 1.6108393669128418, "learning_rate": 9.130764190670464e-06, "loss": 0.8577, "step": 6082 }, { "epoch": 0.21486253195519428, "grad_norm": 1.652113437652588, "learning_rate": 9.130441866914676e-06, "loss": 0.8677, "step": 6083 }, { "epoch": 0.2148978537589022, "grad_norm": 1.7574849128723145, "learning_rate": 9.130119489100442e-06, "loss": 0.8332, "step": 6084 }, { "epoch": 0.2149331755626101, "grad_norm": 1.7089214324951172, "learning_rate": 9.129797057231979e-06, "loss": 0.8415, "step": 6085 }, { "epoch": 0.214968497366318, "grad_norm": 1.621690273284912, "learning_rate": 9.12947457131351e-06, "loss": 0.8435, "step": 6086 }, { "epoch": 0.21500381917002592, "grad_norm": 1.7500194311141968, "learning_rate": 9.129152031349251e-06, "loss": 0.852, "step": 6087 }, { "epoch": 0.21503914097373383, "grad_norm": 1.7663201093673706, "learning_rate": 9.12882943734343e-06, "loss": 0.8429, "step": 6088 }, { "epoch": 0.21507446277744174, "grad_norm": 1.802911639213562, "learning_rate": 9.128506789300263e-06, "loss": 0.8656, "step": 6089 }, { "epoch": 0.21510978458114963, "grad_norm": 1.5936206579208374, "learning_rate": 9.128184087223976e-06, "loss": 0.8594, "step": 6090 }, { "epoch": 0.21514510638485754, "grad_norm": 1.625920057296753, "learning_rate": 9.127861331118792e-06, "loss": 0.8358, "step": 6091 }, { "epoch": 0.21518042818856545, "grad_norm": 1.7904542684555054, "learning_rate": 9.127538520988933e-06, "loss": 0.8425, "step": 6092 }, { "epoch": 0.21521574999227336, "grad_norm": 1.6306208372116089, "learning_rate": 9.127215656838628e-06, "loss": 0.8209, "step": 6093 }, { "epoch": 0.21525107179598127, "grad_norm": 1.5809998512268066, "learning_rate": 9.126892738672099e-06, "loss": 0.8262, "step": 6094 }, { "epoch": 0.21528639359968918, "grad_norm": 1.8031798601150513, "learning_rate": 9.126569766493575e-06, "loss": 0.8757, "step": 6095 }, { "epoch": 0.21532171540339706, "grad_norm": 1.6863017082214355, "learning_rate": 9.12624674030728e-06, "loss": 0.8648, "step": 6096 }, { "epoch": 0.21535703720710497, "grad_norm": 1.6656131744384766, "learning_rate": 9.125923660117445e-06, "loss": 0.8645, "step": 6097 }, { "epoch": 0.21539235901081288, "grad_norm": 1.6014289855957031, "learning_rate": 9.125600525928296e-06, "loss": 0.8475, "step": 6098 }, { "epoch": 0.2154276808145208, "grad_norm": 1.6673526763916016, "learning_rate": 9.125277337744062e-06, "loss": 0.847, "step": 6099 }, { "epoch": 0.2154630026182287, "grad_norm": 1.6230974197387695, "learning_rate": 9.124954095568975e-06, "loss": 0.8455, "step": 6100 }, { "epoch": 0.21549832442193662, "grad_norm": 1.5701873302459717, "learning_rate": 9.124630799407262e-06, "loss": 0.818, "step": 6101 }, { "epoch": 0.21553364622564453, "grad_norm": 1.6164844036102295, "learning_rate": 9.124307449263159e-06, "loss": 0.847, "step": 6102 }, { "epoch": 0.2155689680293524, "grad_norm": 1.6622320413589478, "learning_rate": 9.123984045140895e-06, "loss": 0.8565, "step": 6103 }, { "epoch": 0.21560428983306032, "grad_norm": 1.7395669221878052, "learning_rate": 9.123660587044701e-06, "loss": 0.8668, "step": 6104 }, { "epoch": 0.21563961163676823, "grad_norm": 1.7571669816970825, "learning_rate": 9.123337074978813e-06, "loss": 0.8587, "step": 6105 }, { "epoch": 0.21567493344047614, "grad_norm": 1.7009221315383911, "learning_rate": 9.123013508947466e-06, "loss": 0.8432, "step": 6106 }, { "epoch": 0.21571025524418405, "grad_norm": 1.689893126487732, "learning_rate": 9.122689888954891e-06, "loss": 0.8718, "step": 6107 }, { "epoch": 0.21574557704789196, "grad_norm": 1.6919126510620117, "learning_rate": 9.122366215005329e-06, "loss": 0.8507, "step": 6108 }, { "epoch": 0.21578089885159984, "grad_norm": 1.7978938817977905, "learning_rate": 9.122042487103012e-06, "loss": 0.8549, "step": 6109 }, { "epoch": 0.21581622065530776, "grad_norm": 1.5647801160812378, "learning_rate": 9.121718705252175e-06, "loss": 0.8547, "step": 6110 }, { "epoch": 0.21585154245901567, "grad_norm": 1.6482423543930054, "learning_rate": 9.121394869457061e-06, "loss": 0.8362, "step": 6111 }, { "epoch": 0.21588686426272358, "grad_norm": 1.5916314125061035, "learning_rate": 9.121070979721905e-06, "loss": 0.8524, "step": 6112 }, { "epoch": 0.2159221860664315, "grad_norm": 1.762359857559204, "learning_rate": 9.120747036050946e-06, "loss": 0.8569, "step": 6113 }, { "epoch": 0.2159575078701394, "grad_norm": 1.6775659322738647, "learning_rate": 9.120423038448424e-06, "loss": 0.859, "step": 6114 }, { "epoch": 0.2159928296738473, "grad_norm": 1.8403490781784058, "learning_rate": 9.120098986918582e-06, "loss": 0.8502, "step": 6115 }, { "epoch": 0.2160281514775552, "grad_norm": 1.5727341175079346, "learning_rate": 9.119774881465657e-06, "loss": 0.8442, "step": 6116 }, { "epoch": 0.2160634732812631, "grad_norm": 1.9573837518692017, "learning_rate": 9.119450722093895e-06, "loss": 0.873, "step": 6117 }, { "epoch": 0.216098795084971, "grad_norm": 1.5512313842773438, "learning_rate": 9.119126508807535e-06, "loss": 0.8224, "step": 6118 }, { "epoch": 0.21613411688867892, "grad_norm": 1.7940235137939453, "learning_rate": 9.11880224161082e-06, "loss": 0.8705, "step": 6119 }, { "epoch": 0.21616943869238683, "grad_norm": 1.7251713275909424, "learning_rate": 9.118477920507998e-06, "loss": 0.8699, "step": 6120 }, { "epoch": 0.21620476049609474, "grad_norm": 1.748329520225525, "learning_rate": 9.11815354550331e-06, "loss": 0.8379, "step": 6121 }, { "epoch": 0.21624008229980263, "grad_norm": 1.9776332378387451, "learning_rate": 9.117829116601002e-06, "loss": 0.8816, "step": 6122 }, { "epoch": 0.21627540410351054, "grad_norm": 1.7383365631103516, "learning_rate": 9.117504633805321e-06, "loss": 0.886, "step": 6123 }, { "epoch": 0.21631072590721845, "grad_norm": 1.6559946537017822, "learning_rate": 9.117180097120515e-06, "loss": 0.8398, "step": 6124 }, { "epoch": 0.21634604771092636, "grad_norm": 1.683182954788208, "learning_rate": 9.116855506550828e-06, "loss": 0.8314, "step": 6125 }, { "epoch": 0.21638136951463427, "grad_norm": 1.6700518131256104, "learning_rate": 9.11653086210051e-06, "loss": 0.8525, "step": 6126 }, { "epoch": 0.21641669131834218, "grad_norm": 1.623161792755127, "learning_rate": 9.11620616377381e-06, "loss": 0.8613, "step": 6127 }, { "epoch": 0.2164520131220501, "grad_norm": 1.7109463214874268, "learning_rate": 9.115881411574978e-06, "loss": 0.8527, "step": 6128 }, { "epoch": 0.21648733492575797, "grad_norm": 1.7607372999191284, "learning_rate": 9.115556605508264e-06, "loss": 0.9098, "step": 6129 }, { "epoch": 0.21652265672946588, "grad_norm": 1.6449244022369385, "learning_rate": 9.11523174557792e-06, "loss": 0.8613, "step": 6130 }, { "epoch": 0.2165579785331738, "grad_norm": 1.7552212476730347, "learning_rate": 9.114906831788193e-06, "loss": 0.8887, "step": 6131 }, { "epoch": 0.2165933003368817, "grad_norm": 1.5097343921661377, "learning_rate": 9.114581864143342e-06, "loss": 0.8309, "step": 6132 }, { "epoch": 0.21662862214058962, "grad_norm": 3.3524601459503174, "learning_rate": 9.114256842647615e-06, "loss": 0.8574, "step": 6133 }, { "epoch": 0.21666394394429753, "grad_norm": 1.8665999174118042, "learning_rate": 9.113931767305269e-06, "loss": 0.818, "step": 6134 }, { "epoch": 0.2166992657480054, "grad_norm": 1.6617259979248047, "learning_rate": 9.113606638120557e-06, "loss": 0.8465, "step": 6135 }, { "epoch": 0.21673458755171332, "grad_norm": 1.8113861083984375, "learning_rate": 9.113281455097734e-06, "loss": 0.8069, "step": 6136 }, { "epoch": 0.21676990935542123, "grad_norm": 1.6246334314346313, "learning_rate": 9.112956218241058e-06, "loss": 0.8396, "step": 6137 }, { "epoch": 0.21680523115912914, "grad_norm": 1.7440346479415894, "learning_rate": 9.112630927554783e-06, "loss": 0.861, "step": 6138 }, { "epoch": 0.21684055296283705, "grad_norm": 1.9084813594818115, "learning_rate": 9.112305583043169e-06, "loss": 0.8389, "step": 6139 }, { "epoch": 0.21687587476654496, "grad_norm": 1.8590319156646729, "learning_rate": 9.111980184710471e-06, "loss": 0.8606, "step": 6140 }, { "epoch": 0.21691119657025287, "grad_norm": 1.7331594228744507, "learning_rate": 9.111654732560948e-06, "loss": 0.8869, "step": 6141 }, { "epoch": 0.21694651837396076, "grad_norm": 1.644003987312317, "learning_rate": 9.111329226598864e-06, "loss": 0.8705, "step": 6142 }, { "epoch": 0.21698184017766867, "grad_norm": 1.751165747642517, "learning_rate": 9.111003666828473e-06, "loss": 0.8196, "step": 6143 }, { "epoch": 0.21701716198137658, "grad_norm": 1.845392107963562, "learning_rate": 9.110678053254041e-06, "loss": 0.869, "step": 6144 }, { "epoch": 0.2170524837850845, "grad_norm": 1.6495386362075806, "learning_rate": 9.110352385879827e-06, "loss": 0.81, "step": 6145 }, { "epoch": 0.2170878055887924, "grad_norm": 1.606948971748352, "learning_rate": 9.110026664710092e-06, "loss": 0.8559, "step": 6146 }, { "epoch": 0.2171231273925003, "grad_norm": 1.774660587310791, "learning_rate": 9.109700889749102e-06, "loss": 0.8616, "step": 6147 }, { "epoch": 0.2171584491962082, "grad_norm": 1.8201391696929932, "learning_rate": 9.10937506100112e-06, "loss": 0.8676, "step": 6148 }, { "epoch": 0.2171937709999161, "grad_norm": 1.5231058597564697, "learning_rate": 9.109049178470409e-06, "loss": 0.8433, "step": 6149 }, { "epoch": 0.217229092803624, "grad_norm": 1.7557603120803833, "learning_rate": 9.108723242161235e-06, "loss": 0.9241, "step": 6150 }, { "epoch": 0.21726441460733192, "grad_norm": 1.84048593044281, "learning_rate": 9.108397252077863e-06, "loss": 0.8534, "step": 6151 }, { "epoch": 0.21729973641103983, "grad_norm": 1.691206455230713, "learning_rate": 9.108071208224558e-06, "loss": 0.8097, "step": 6152 }, { "epoch": 0.21733505821474774, "grad_norm": 1.6945003271102905, "learning_rate": 9.107745110605592e-06, "loss": 0.8265, "step": 6153 }, { "epoch": 0.21737038001845566, "grad_norm": 1.657296061515808, "learning_rate": 9.10741895922523e-06, "loss": 0.8449, "step": 6154 }, { "epoch": 0.21740570182216354, "grad_norm": 1.8892697095870972, "learning_rate": 9.10709275408774e-06, "loss": 0.8714, "step": 6155 }, { "epoch": 0.21744102362587145, "grad_norm": 1.8870829343795776, "learning_rate": 9.106766495197393e-06, "loss": 0.8551, "step": 6156 }, { "epoch": 0.21747634542957936, "grad_norm": 1.8790119886398315, "learning_rate": 9.106440182558457e-06, "loss": 0.8567, "step": 6157 }, { "epoch": 0.21751166723328727, "grad_norm": 1.6402887105941772, "learning_rate": 9.106113816175206e-06, "loss": 0.8469, "step": 6158 }, { "epoch": 0.21754698903699518, "grad_norm": 1.7850068807601929, "learning_rate": 9.105787396051906e-06, "loss": 0.8463, "step": 6159 }, { "epoch": 0.2175823108407031, "grad_norm": 1.8387693166732788, "learning_rate": 9.105460922192835e-06, "loss": 0.8734, "step": 6160 }, { "epoch": 0.21761763264441097, "grad_norm": 1.638890266418457, "learning_rate": 9.105134394602262e-06, "loss": 0.8583, "step": 6161 }, { "epoch": 0.21765295444811888, "grad_norm": 1.5983511209487915, "learning_rate": 9.104807813284462e-06, "loss": 0.8624, "step": 6162 }, { "epoch": 0.2176882762518268, "grad_norm": 1.6523714065551758, "learning_rate": 9.10448117824371e-06, "loss": 0.8586, "step": 6163 }, { "epoch": 0.2177235980555347, "grad_norm": 1.6820709705352783, "learning_rate": 9.104154489484278e-06, "loss": 0.8346, "step": 6164 }, { "epoch": 0.21775891985924262, "grad_norm": 1.5193560123443604, "learning_rate": 9.103827747010443e-06, "loss": 0.8239, "step": 6165 }, { "epoch": 0.21779424166295053, "grad_norm": 2.3728628158569336, "learning_rate": 9.103500950826484e-06, "loss": 0.8336, "step": 6166 }, { "epoch": 0.21782956346665844, "grad_norm": 1.7420376539230347, "learning_rate": 9.103174100936676e-06, "loss": 0.8499, "step": 6167 }, { "epoch": 0.21786488527036632, "grad_norm": 1.6909431219100952, "learning_rate": 9.102847197345294e-06, "loss": 0.8562, "step": 6168 }, { "epoch": 0.21790020707407423, "grad_norm": 1.6622438430786133, "learning_rate": 9.102520240056623e-06, "loss": 0.8015, "step": 6169 }, { "epoch": 0.21793552887778214, "grad_norm": 1.73637056350708, "learning_rate": 9.102193229074934e-06, "loss": 0.8301, "step": 6170 }, { "epoch": 0.21797085068149005, "grad_norm": 1.8569438457489014, "learning_rate": 9.101866164404513e-06, "loss": 0.8699, "step": 6171 }, { "epoch": 0.21800617248519796, "grad_norm": 1.7891769409179688, "learning_rate": 9.101539046049642e-06, "loss": 0.8452, "step": 6172 }, { "epoch": 0.21804149428890587, "grad_norm": 1.7839272022247314, "learning_rate": 9.101211874014594e-06, "loss": 0.8358, "step": 6173 }, { "epoch": 0.21807681609261376, "grad_norm": 1.8156508207321167, "learning_rate": 9.100884648303659e-06, "loss": 0.8896, "step": 6174 }, { "epoch": 0.21811213789632167, "grad_norm": 1.836055874824524, "learning_rate": 9.100557368921115e-06, "loss": 0.8401, "step": 6175 }, { "epoch": 0.21814745970002958, "grad_norm": 1.8116002082824707, "learning_rate": 9.100230035871247e-06, "loss": 0.8638, "step": 6176 }, { "epoch": 0.2181827815037375, "grad_norm": 1.7206311225891113, "learning_rate": 9.099902649158339e-06, "loss": 0.8486, "step": 6177 }, { "epoch": 0.2182181033074454, "grad_norm": 1.8973151445388794, "learning_rate": 9.099575208786678e-06, "loss": 0.8506, "step": 6178 }, { "epoch": 0.2182534251111533, "grad_norm": 1.929527997970581, "learning_rate": 9.099247714760546e-06, "loss": 0.878, "step": 6179 }, { "epoch": 0.21828874691486122, "grad_norm": 2.1610214710235596, "learning_rate": 9.098920167084228e-06, "loss": 0.8458, "step": 6180 }, { "epoch": 0.2183240687185691, "grad_norm": 1.7897961139678955, "learning_rate": 9.098592565762016e-06, "loss": 0.8076, "step": 6181 }, { "epoch": 0.218359390522277, "grad_norm": 1.803635835647583, "learning_rate": 9.098264910798196e-06, "loss": 0.8707, "step": 6182 }, { "epoch": 0.21839471232598492, "grad_norm": 1.5888532400131226, "learning_rate": 9.097937202197054e-06, "loss": 0.8184, "step": 6183 }, { "epoch": 0.21843003412969283, "grad_norm": 1.6909873485565186, "learning_rate": 9.097609439962879e-06, "loss": 0.8336, "step": 6184 }, { "epoch": 0.21846535593340075, "grad_norm": 1.6466259956359863, "learning_rate": 9.097281624099962e-06, "loss": 0.8402, "step": 6185 }, { "epoch": 0.21850067773710866, "grad_norm": 1.628839373588562, "learning_rate": 9.096953754612595e-06, "loss": 0.831, "step": 6186 }, { "epoch": 0.21853599954081654, "grad_norm": 1.660760521888733, "learning_rate": 9.096625831505067e-06, "loss": 0.8154, "step": 6187 }, { "epoch": 0.21857132134452445, "grad_norm": 1.6352797746658325, "learning_rate": 9.096297854781669e-06, "loss": 0.8477, "step": 6188 }, { "epoch": 0.21860664314823236, "grad_norm": 1.790532112121582, "learning_rate": 9.095969824446696e-06, "loss": 0.8824, "step": 6189 }, { "epoch": 0.21864196495194027, "grad_norm": 1.6867103576660156, "learning_rate": 9.09564174050444e-06, "loss": 0.8447, "step": 6190 }, { "epoch": 0.21867728675564818, "grad_norm": 1.620751976966858, "learning_rate": 9.095313602959194e-06, "loss": 0.8432, "step": 6191 }, { "epoch": 0.2187126085593561, "grad_norm": 1.5444766283035278, "learning_rate": 9.094985411815253e-06, "loss": 0.8561, "step": 6192 }, { "epoch": 0.218747930363064, "grad_norm": 1.8191919326782227, "learning_rate": 9.094657167076914e-06, "loss": 0.8315, "step": 6193 }, { "epoch": 0.21878325216677189, "grad_norm": 1.7224944829940796, "learning_rate": 9.09432886874847e-06, "loss": 0.8711, "step": 6194 }, { "epoch": 0.2188185739704798, "grad_norm": 1.225143551826477, "learning_rate": 9.09400051683422e-06, "loss": 0.6232, "step": 6195 }, { "epoch": 0.2188538957741877, "grad_norm": 1.7960083484649658, "learning_rate": 9.093672111338463e-06, "loss": 0.8492, "step": 6196 }, { "epoch": 0.21888921757789562, "grad_norm": 1.8355059623718262, "learning_rate": 9.09334365226549e-06, "loss": 0.8159, "step": 6197 }, { "epoch": 0.21892453938160353, "grad_norm": 1.6053513288497925, "learning_rate": 9.09301513961961e-06, "loss": 0.9047, "step": 6198 }, { "epoch": 0.21895986118531144, "grad_norm": 2.2729058265686035, "learning_rate": 9.092686573405117e-06, "loss": 0.9108, "step": 6199 }, { "epoch": 0.21899518298901932, "grad_norm": 1.722304344177246, "learning_rate": 9.092357953626308e-06, "loss": 0.8128, "step": 6200 }, { "epoch": 0.21903050479272723, "grad_norm": 1.716153860092163, "learning_rate": 9.09202928028749e-06, "loss": 0.8315, "step": 6201 }, { "epoch": 0.21906582659643514, "grad_norm": 1.6396344900131226, "learning_rate": 9.091700553392958e-06, "loss": 0.8244, "step": 6202 }, { "epoch": 0.21910114840014305, "grad_norm": 1.7022435665130615, "learning_rate": 9.091371772947024e-06, "loss": 0.834, "step": 6203 }, { "epoch": 0.21913647020385096, "grad_norm": 1.8167493343353271, "learning_rate": 9.091042938953982e-06, "loss": 0.804, "step": 6204 }, { "epoch": 0.21917179200755887, "grad_norm": 1.4878727197647095, "learning_rate": 9.09071405141814e-06, "loss": 0.8281, "step": 6205 }, { "epoch": 0.21920711381126678, "grad_norm": 1.7249897718429565, "learning_rate": 9.090385110343801e-06, "loss": 0.8627, "step": 6206 }, { "epoch": 0.21924243561497467, "grad_norm": 1.7263853549957275, "learning_rate": 9.090056115735271e-06, "loss": 0.8403, "step": 6207 }, { "epoch": 0.21927775741868258, "grad_norm": 2.117473602294922, "learning_rate": 9.089727067596855e-06, "loss": 0.8372, "step": 6208 }, { "epoch": 0.2193130792223905, "grad_norm": 1.5611892938613892, "learning_rate": 9.089397965932861e-06, "loss": 0.842, "step": 6209 }, { "epoch": 0.2193484010260984, "grad_norm": 1.7822818756103516, "learning_rate": 9.089068810747594e-06, "loss": 0.8241, "step": 6210 }, { "epoch": 0.2193837228298063, "grad_norm": 1.6648720502853394, "learning_rate": 9.088739602045362e-06, "loss": 0.8422, "step": 6211 }, { "epoch": 0.21941904463351422, "grad_norm": 1.5384771823883057, "learning_rate": 9.088410339830477e-06, "loss": 0.8412, "step": 6212 }, { "epoch": 0.2194543664372221, "grad_norm": 1.6970843076705933, "learning_rate": 9.088081024107245e-06, "loss": 0.8626, "step": 6213 }, { "epoch": 0.21948968824093001, "grad_norm": 1.7294574975967407, "learning_rate": 9.087751654879976e-06, "loss": 0.8547, "step": 6214 }, { "epoch": 0.21952501004463792, "grad_norm": 1.957714319229126, "learning_rate": 9.087422232152983e-06, "loss": 0.8926, "step": 6215 }, { "epoch": 0.21956033184834584, "grad_norm": 1.6864445209503174, "learning_rate": 9.087092755930577e-06, "loss": 0.7959, "step": 6216 }, { "epoch": 0.21959565365205375, "grad_norm": 1.5742757320404053, "learning_rate": 9.086763226217068e-06, "loss": 0.8232, "step": 6217 }, { "epoch": 0.21963097545576166, "grad_norm": 1.9301717281341553, "learning_rate": 9.08643364301677e-06, "loss": 0.8445, "step": 6218 }, { "epoch": 0.21966629725946957, "grad_norm": 1.6750705242156982, "learning_rate": 9.086104006333997e-06, "loss": 0.8411, "step": 6219 }, { "epoch": 0.21970161906317745, "grad_norm": 1.6116224527359009, "learning_rate": 9.085774316173063e-06, "loss": 0.839, "step": 6220 }, { "epoch": 0.21973694086688536, "grad_norm": 1.7078534364700317, "learning_rate": 9.085444572538282e-06, "loss": 0.8495, "step": 6221 }, { "epoch": 0.21977226267059327, "grad_norm": 1.6964086294174194, "learning_rate": 9.085114775433973e-06, "loss": 0.8643, "step": 6222 }, { "epoch": 0.21980758447430118, "grad_norm": 1.5610426664352417, "learning_rate": 9.084784924864448e-06, "loss": 0.8568, "step": 6223 }, { "epoch": 0.2198429062780091, "grad_norm": 1.6628506183624268, "learning_rate": 9.084455020834025e-06, "loss": 0.8457, "step": 6224 }, { "epoch": 0.219878228081717, "grad_norm": 1.8245762586593628, "learning_rate": 9.084125063347023e-06, "loss": 0.8381, "step": 6225 }, { "epoch": 0.21991354988542489, "grad_norm": 2.0405642986297607, "learning_rate": 9.083795052407759e-06, "loss": 0.8428, "step": 6226 }, { "epoch": 0.2199488716891328, "grad_norm": 1.700769066810608, "learning_rate": 9.083464988020553e-06, "loss": 0.8364, "step": 6227 }, { "epoch": 0.2199841934928407, "grad_norm": 1.79066801071167, "learning_rate": 9.083134870189728e-06, "loss": 0.8891, "step": 6228 }, { "epoch": 0.22001951529654862, "grad_norm": 1.8337173461914062, "learning_rate": 9.082804698919599e-06, "loss": 0.8411, "step": 6229 }, { "epoch": 0.22005483710025653, "grad_norm": 1.7072193622589111, "learning_rate": 9.08247447421449e-06, "loss": 0.8612, "step": 6230 }, { "epoch": 0.22009015890396444, "grad_norm": 1.7010939121246338, "learning_rate": 9.082144196078721e-06, "loss": 0.8571, "step": 6231 }, { "epoch": 0.22012548070767235, "grad_norm": 2.0309784412384033, "learning_rate": 9.081813864516618e-06, "loss": 0.8644, "step": 6232 }, { "epoch": 0.22016080251138023, "grad_norm": 1.7405645847320557, "learning_rate": 9.081483479532502e-06, "loss": 0.8581, "step": 6233 }, { "epoch": 0.22019612431508814, "grad_norm": 1.6523371934890747, "learning_rate": 9.081153041130697e-06, "loss": 0.8424, "step": 6234 }, { "epoch": 0.22023144611879605, "grad_norm": 1.7016968727111816, "learning_rate": 9.080822549315528e-06, "loss": 0.8522, "step": 6235 }, { "epoch": 0.22026676792250396, "grad_norm": 1.6554745435714722, "learning_rate": 9.080492004091322e-06, "loss": 0.8514, "step": 6236 }, { "epoch": 0.22030208972621187, "grad_norm": 2.394258499145508, "learning_rate": 9.080161405462402e-06, "loss": 0.8288, "step": 6237 }, { "epoch": 0.22033741152991979, "grad_norm": 1.6449350118637085, "learning_rate": 9.079830753433097e-06, "loss": 0.8277, "step": 6238 }, { "epoch": 0.22037273333362767, "grad_norm": 1.817635416984558, "learning_rate": 9.079500048007734e-06, "loss": 0.8244, "step": 6239 }, { "epoch": 0.22040805513733558, "grad_norm": 1.698758602142334, "learning_rate": 9.079169289190643e-06, "loss": 0.8386, "step": 6240 }, { "epoch": 0.2204433769410435, "grad_norm": 1.6072276830673218, "learning_rate": 9.07883847698615e-06, "loss": 0.8625, "step": 6241 }, { "epoch": 0.2204786987447514, "grad_norm": 1.7021009922027588, "learning_rate": 9.078507611398585e-06, "loss": 0.8553, "step": 6242 }, { "epoch": 0.2205140205484593, "grad_norm": 1.881776213645935, "learning_rate": 9.078176692432279e-06, "loss": 0.8453, "step": 6243 }, { "epoch": 0.22054934235216722, "grad_norm": 1.773197889328003, "learning_rate": 9.077845720091563e-06, "loss": 0.8942, "step": 6244 }, { "epoch": 0.22058466415587513, "grad_norm": 1.6661819219589233, "learning_rate": 9.07751469438077e-06, "loss": 0.8457, "step": 6245 }, { "epoch": 0.22061998595958301, "grad_norm": 1.7685562372207642, "learning_rate": 9.07718361530423e-06, "loss": 0.8547, "step": 6246 }, { "epoch": 0.22065530776329093, "grad_norm": 1.709150791168213, "learning_rate": 9.076852482866278e-06, "loss": 0.8485, "step": 6247 }, { "epoch": 0.22069062956699884, "grad_norm": 1.8580594062805176, "learning_rate": 9.076521297071247e-06, "loss": 0.8541, "step": 6248 }, { "epoch": 0.22072595137070675, "grad_norm": 1.9030057191848755, "learning_rate": 9.076190057923471e-06, "loss": 0.8653, "step": 6249 }, { "epoch": 0.22076127317441466, "grad_norm": 1.8603644371032715, "learning_rate": 9.075858765427287e-06, "loss": 0.873, "step": 6250 }, { "epoch": 0.22079659497812257, "grad_norm": 1.7870632410049438, "learning_rate": 9.075527419587029e-06, "loss": 0.8652, "step": 6251 }, { "epoch": 0.22083191678183045, "grad_norm": 1.5158367156982422, "learning_rate": 9.075196020407033e-06, "loss": 0.8145, "step": 6252 }, { "epoch": 0.22086723858553836, "grad_norm": 1.2300903797149658, "learning_rate": 9.074864567891639e-06, "loss": 0.6232, "step": 6253 }, { "epoch": 0.22090256038924627, "grad_norm": 1.8887418508529663, "learning_rate": 9.074533062045183e-06, "loss": 0.9062, "step": 6254 }, { "epoch": 0.22093788219295418, "grad_norm": 1.7099277973175049, "learning_rate": 9.074201502872004e-06, "loss": 0.8441, "step": 6255 }, { "epoch": 0.2209732039966621, "grad_norm": 3.2439801692962646, "learning_rate": 9.073869890376443e-06, "loss": 0.8365, "step": 6256 }, { "epoch": 0.22100852580037, "grad_norm": 1.6896910667419434, "learning_rate": 9.073538224562837e-06, "loss": 0.8763, "step": 6257 }, { "epoch": 0.22104384760407791, "grad_norm": 1.5811272859573364, "learning_rate": 9.073206505435528e-06, "loss": 0.8357, "step": 6258 }, { "epoch": 0.2210791694077858, "grad_norm": 2.089980125427246, "learning_rate": 9.07287473299886e-06, "loss": 0.8922, "step": 6259 }, { "epoch": 0.2211144912114937, "grad_norm": 2.004399299621582, "learning_rate": 9.072542907257171e-06, "loss": 0.8668, "step": 6260 }, { "epoch": 0.22114981301520162, "grad_norm": 1.8903299570083618, "learning_rate": 9.072211028214807e-06, "loss": 0.8686, "step": 6261 }, { "epoch": 0.22118513481890953, "grad_norm": 1.804645299911499, "learning_rate": 9.071879095876111e-06, "loss": 0.8706, "step": 6262 }, { "epoch": 0.22122045662261744, "grad_norm": 1.822546124458313, "learning_rate": 9.071547110245427e-06, "loss": 0.8596, "step": 6263 }, { "epoch": 0.22125577842632535, "grad_norm": 2.190467596054077, "learning_rate": 9.0712150713271e-06, "loss": 0.8364, "step": 6264 }, { "epoch": 0.22129110023003323, "grad_norm": 1.6064355373382568, "learning_rate": 9.070882979125474e-06, "loss": 0.8618, "step": 6265 }, { "epoch": 0.22132642203374114, "grad_norm": 1.6267331838607788, "learning_rate": 9.070550833644898e-06, "loss": 0.8655, "step": 6266 }, { "epoch": 0.22136174383744905, "grad_norm": 1.6609773635864258, "learning_rate": 9.070218634889716e-06, "loss": 0.8803, "step": 6267 }, { "epoch": 0.22139706564115696, "grad_norm": 1.6530864238739014, "learning_rate": 9.06988638286428e-06, "loss": 0.8406, "step": 6268 }, { "epoch": 0.22143238744486488, "grad_norm": 1.5620867013931274, "learning_rate": 9.069554077572935e-06, "loss": 0.8339, "step": 6269 }, { "epoch": 0.22146770924857279, "grad_norm": 1.6394495964050293, "learning_rate": 9.069221719020032e-06, "loss": 0.8092, "step": 6270 }, { "epoch": 0.2215030310522807, "grad_norm": 1.7059142589569092, "learning_rate": 9.068889307209918e-06, "loss": 0.8589, "step": 6271 }, { "epoch": 0.22153835285598858, "grad_norm": 1.9256279468536377, "learning_rate": 9.068556842146947e-06, "loss": 0.8857, "step": 6272 }, { "epoch": 0.2215736746596965, "grad_norm": 1.4603041410446167, "learning_rate": 9.068224323835471e-06, "loss": 0.6271, "step": 6273 }, { "epoch": 0.2216089964634044, "grad_norm": 1.5699700117111206, "learning_rate": 9.067891752279836e-06, "loss": 0.8207, "step": 6274 }, { "epoch": 0.2216443182671123, "grad_norm": 1.5778448581695557, "learning_rate": 9.0675591274844e-06, "loss": 0.8226, "step": 6275 }, { "epoch": 0.22167964007082022, "grad_norm": 1.715490698814392, "learning_rate": 9.067226449453515e-06, "loss": 0.8558, "step": 6276 }, { "epoch": 0.22171496187452813, "grad_norm": 1.8620339632034302, "learning_rate": 9.066893718191535e-06, "loss": 0.8327, "step": 6277 }, { "epoch": 0.22175028367823602, "grad_norm": 1.709902286529541, "learning_rate": 9.066560933702815e-06, "loss": 0.872, "step": 6278 }, { "epoch": 0.22178560548194393, "grad_norm": 1.6791666746139526, "learning_rate": 9.066228095991708e-06, "loss": 0.8174, "step": 6279 }, { "epoch": 0.22182092728565184, "grad_norm": 1.799142599105835, "learning_rate": 9.065895205062574e-06, "loss": 0.8488, "step": 6280 }, { "epoch": 0.22185624908935975, "grad_norm": 1.7607673406600952, "learning_rate": 9.065562260919768e-06, "loss": 0.8528, "step": 6281 }, { "epoch": 0.22189157089306766, "grad_norm": 1.7129709720611572, "learning_rate": 9.065229263567646e-06, "loss": 0.8783, "step": 6282 }, { "epoch": 0.22192689269677557, "grad_norm": 1.7068637609481812, "learning_rate": 9.064896213010566e-06, "loss": 0.8467, "step": 6283 }, { "epoch": 0.22196221450048348, "grad_norm": 1.7601374387741089, "learning_rate": 9.064563109252891e-06, "loss": 0.8154, "step": 6284 }, { "epoch": 0.22199753630419136, "grad_norm": 1.531456708908081, "learning_rate": 9.064229952298978e-06, "loss": 0.8381, "step": 6285 }, { "epoch": 0.22203285810789927, "grad_norm": 1.6367988586425781, "learning_rate": 9.063896742153186e-06, "loss": 0.8319, "step": 6286 }, { "epoch": 0.22206817991160718, "grad_norm": 1.8096596002578735, "learning_rate": 9.06356347881988e-06, "loss": 0.8426, "step": 6287 }, { "epoch": 0.2221035017153151, "grad_norm": 1.8430957794189453, "learning_rate": 9.063230162303415e-06, "loss": 0.8483, "step": 6288 }, { "epoch": 0.222138823519023, "grad_norm": 1.7583889961242676, "learning_rate": 9.06289679260816e-06, "loss": 0.8101, "step": 6289 }, { "epoch": 0.22217414532273091, "grad_norm": 1.6709359884262085, "learning_rate": 9.062563369738475e-06, "loss": 0.8378, "step": 6290 }, { "epoch": 0.2222094671264388, "grad_norm": 1.6226019859313965, "learning_rate": 9.062229893698727e-06, "loss": 0.8036, "step": 6291 }, { "epoch": 0.2222447889301467, "grad_norm": 1.7379814386367798, "learning_rate": 9.061896364493274e-06, "loss": 0.8422, "step": 6292 }, { "epoch": 0.22228011073385462, "grad_norm": 1.6668715476989746, "learning_rate": 9.061562782126485e-06, "loss": 0.8267, "step": 6293 }, { "epoch": 0.22231543253756253, "grad_norm": 1.6125835180282593, "learning_rate": 9.061229146602728e-06, "loss": 0.8451, "step": 6294 }, { "epoch": 0.22235075434127044, "grad_norm": 1.6134916543960571, "learning_rate": 9.060895457926367e-06, "loss": 0.8548, "step": 6295 }, { "epoch": 0.22238607614497835, "grad_norm": 1.6935020685195923, "learning_rate": 9.060561716101768e-06, "loss": 0.8806, "step": 6296 }, { "epoch": 0.22242139794868626, "grad_norm": 1.7755978107452393, "learning_rate": 9.060227921133303e-06, "loss": 0.8598, "step": 6297 }, { "epoch": 0.22245671975239414, "grad_norm": 1.6397613286972046, "learning_rate": 9.059894073025334e-06, "loss": 0.873, "step": 6298 }, { "epoch": 0.22249204155610205, "grad_norm": 1.68197762966156, "learning_rate": 9.059560171782238e-06, "loss": 0.8603, "step": 6299 }, { "epoch": 0.22252736335980997, "grad_norm": 2.0446584224700928, "learning_rate": 9.059226217408381e-06, "loss": 0.8211, "step": 6300 }, { "epoch": 0.22256268516351788, "grad_norm": 1.6600909233093262, "learning_rate": 9.058892209908134e-06, "loss": 0.8428, "step": 6301 }, { "epoch": 0.2225980069672258, "grad_norm": 1.90259850025177, "learning_rate": 9.058558149285869e-06, "loss": 0.8633, "step": 6302 }, { "epoch": 0.2226333287709337, "grad_norm": 1.7428022623062134, "learning_rate": 9.05822403554596e-06, "loss": 0.8433, "step": 6303 }, { "epoch": 0.2226686505746416, "grad_norm": 1.1195697784423828, "learning_rate": 9.057889868692774e-06, "loss": 0.6349, "step": 6304 }, { "epoch": 0.2227039723783495, "grad_norm": 1.7449190616607666, "learning_rate": 9.05755564873069e-06, "loss": 0.8265, "step": 6305 }, { "epoch": 0.2227392941820574, "grad_norm": 1.7569711208343506, "learning_rate": 9.05722137566408e-06, "loss": 0.8379, "step": 6306 }, { "epoch": 0.2227746159857653, "grad_norm": 1.9582732915878296, "learning_rate": 9.05688704949732e-06, "loss": 0.8457, "step": 6307 }, { "epoch": 0.22280993778947322, "grad_norm": 1.7138640880584717, "learning_rate": 9.056552670234783e-06, "loss": 0.8534, "step": 6308 }, { "epoch": 0.22284525959318113, "grad_norm": 1.7072906494140625, "learning_rate": 9.056218237880849e-06, "loss": 0.8236, "step": 6309 }, { "epoch": 0.22288058139688904, "grad_norm": 1.609576940536499, "learning_rate": 9.055883752439893e-06, "loss": 0.8278, "step": 6310 }, { "epoch": 0.22291590320059693, "grad_norm": 1.635815978050232, "learning_rate": 9.055549213916293e-06, "loss": 0.8376, "step": 6311 }, { "epoch": 0.22295122500430484, "grad_norm": 1.7128102779388428, "learning_rate": 9.055214622314426e-06, "loss": 0.8481, "step": 6312 }, { "epoch": 0.22298654680801275, "grad_norm": 1.731765866279602, "learning_rate": 9.054879977638673e-06, "loss": 0.8663, "step": 6313 }, { "epoch": 0.22302186861172066, "grad_norm": 1.6268582344055176, "learning_rate": 9.054545279893414e-06, "loss": 0.8376, "step": 6314 }, { "epoch": 0.22305719041542857, "grad_norm": 1.5589160919189453, "learning_rate": 9.054210529083028e-06, "loss": 0.835, "step": 6315 }, { "epoch": 0.22309251221913648, "grad_norm": 1.7768809795379639, "learning_rate": 9.053875725211896e-06, "loss": 0.8822, "step": 6316 }, { "epoch": 0.2231278340228444, "grad_norm": 1.5933035612106323, "learning_rate": 9.053540868284401e-06, "loss": 0.8379, "step": 6317 }, { "epoch": 0.22316315582655227, "grad_norm": 1.5850268602371216, "learning_rate": 9.053205958304925e-06, "loss": 0.8524, "step": 6318 }, { "epoch": 0.22319847763026018, "grad_norm": 1.790166974067688, "learning_rate": 9.052870995277852e-06, "loss": 0.8566, "step": 6319 }, { "epoch": 0.2232337994339681, "grad_norm": 1.8039108514785767, "learning_rate": 9.052535979207566e-06, "loss": 0.8589, "step": 6320 }, { "epoch": 0.223269121237676, "grad_norm": 1.6902589797973633, "learning_rate": 9.05220091009845e-06, "loss": 0.8512, "step": 6321 }, { "epoch": 0.22330444304138392, "grad_norm": 1.7164814472198486, "learning_rate": 9.051865787954888e-06, "loss": 0.8355, "step": 6322 }, { "epoch": 0.22333976484509183, "grad_norm": 1.5672380924224854, "learning_rate": 9.05153061278127e-06, "loss": 0.7988, "step": 6323 }, { "epoch": 0.2233750866487997, "grad_norm": 1.590230941772461, "learning_rate": 9.051195384581981e-06, "loss": 0.8481, "step": 6324 }, { "epoch": 0.22341040845250762, "grad_norm": 1.7169057130813599, "learning_rate": 9.05086010336141e-06, "loss": 0.8351, "step": 6325 }, { "epoch": 0.22344573025621553, "grad_norm": 1.7561819553375244, "learning_rate": 9.05052476912394e-06, "loss": 0.8523, "step": 6326 }, { "epoch": 0.22348105205992344, "grad_norm": 1.7053710222244263, "learning_rate": 9.050189381873966e-06, "loss": 0.8359, "step": 6327 }, { "epoch": 0.22351637386363135, "grad_norm": 1.648074984550476, "learning_rate": 9.049853941615874e-06, "loss": 0.9033, "step": 6328 }, { "epoch": 0.22355169566733926, "grad_norm": 1.7687989473342896, "learning_rate": 9.049518448354056e-06, "loss": 0.8531, "step": 6329 }, { "epoch": 0.22358701747104717, "grad_norm": 1.6497012376785278, "learning_rate": 9.0491829020929e-06, "loss": 0.8514, "step": 6330 }, { "epoch": 0.22362233927475506, "grad_norm": 1.4984174966812134, "learning_rate": 9.048847302836799e-06, "loss": 0.8256, "step": 6331 }, { "epoch": 0.22365766107846297, "grad_norm": 2.018608808517456, "learning_rate": 9.048511650590145e-06, "loss": 0.8402, "step": 6332 }, { "epoch": 0.22369298288217088, "grad_norm": 1.8163641691207886, "learning_rate": 9.048175945357336e-06, "loss": 0.8781, "step": 6333 }, { "epoch": 0.2237283046858788, "grad_norm": 1.7078150510787964, "learning_rate": 9.047840187142757e-06, "loss": 0.8541, "step": 6334 }, { "epoch": 0.2237636264895867, "grad_norm": 1.6776432991027832, "learning_rate": 9.047504375950807e-06, "loss": 0.8178, "step": 6335 }, { "epoch": 0.2237989482932946, "grad_norm": 1.5999476909637451, "learning_rate": 9.047168511785882e-06, "loss": 0.8569, "step": 6336 }, { "epoch": 0.2238342700970025, "grad_norm": 1.7459295988082886, "learning_rate": 9.046832594652374e-06, "loss": 0.8628, "step": 6337 }, { "epoch": 0.2238695919007104, "grad_norm": 1.651499629020691, "learning_rate": 9.046496624554685e-06, "loss": 0.8438, "step": 6338 }, { "epoch": 0.2239049137044183, "grad_norm": 1.6550992727279663, "learning_rate": 9.046160601497207e-06, "loss": 0.8524, "step": 6339 }, { "epoch": 0.22394023550812622, "grad_norm": 1.576360821723938, "learning_rate": 9.045824525484339e-06, "loss": 0.869, "step": 6340 }, { "epoch": 0.22397555731183413, "grad_norm": 1.5273239612579346, "learning_rate": 9.04548839652048e-06, "loss": 0.8373, "step": 6341 }, { "epoch": 0.22401087911554204, "grad_norm": 1.704424500465393, "learning_rate": 9.04515221461003e-06, "loss": 0.8441, "step": 6342 }, { "epoch": 0.22404620091924995, "grad_norm": 1.5981693267822266, "learning_rate": 9.04481597975739e-06, "loss": 0.8151, "step": 6343 }, { "epoch": 0.22408152272295784, "grad_norm": 1.6691168546676636, "learning_rate": 9.044479691966958e-06, "loss": 0.8416, "step": 6344 }, { "epoch": 0.22411684452666575, "grad_norm": 2.117208242416382, "learning_rate": 9.044143351243135e-06, "loss": 0.8199, "step": 6345 }, { "epoch": 0.22415216633037366, "grad_norm": 1.729589581489563, "learning_rate": 9.043806957590325e-06, "loss": 0.8082, "step": 6346 }, { "epoch": 0.22418748813408157, "grad_norm": 1.6709206104278564, "learning_rate": 9.043470511012928e-06, "loss": 0.8831, "step": 6347 }, { "epoch": 0.22422280993778948, "grad_norm": 1.6710025072097778, "learning_rate": 9.043134011515351e-06, "loss": 0.8582, "step": 6348 }, { "epoch": 0.2242581317414974, "grad_norm": 1.6396584510803223, "learning_rate": 9.042797459101996e-06, "loss": 0.8151, "step": 6349 }, { "epoch": 0.22429345354520527, "grad_norm": 1.7021225690841675, "learning_rate": 9.042460853777267e-06, "loss": 0.8447, "step": 6350 }, { "epoch": 0.22432877534891318, "grad_norm": 1.6531729698181152, "learning_rate": 9.042124195545569e-06, "loss": 0.8598, "step": 6351 }, { "epoch": 0.2243640971526211, "grad_norm": 1.7366193532943726, "learning_rate": 9.04178748441131e-06, "loss": 0.855, "step": 6352 }, { "epoch": 0.224399418956329, "grad_norm": 1.491532802581787, "learning_rate": 9.041450720378897e-06, "loss": 0.8096, "step": 6353 }, { "epoch": 0.22443474076003692, "grad_norm": 1.772377371788025, "learning_rate": 9.041113903452737e-06, "loss": 0.8528, "step": 6354 }, { "epoch": 0.22447006256374483, "grad_norm": 1.6872798204421997, "learning_rate": 9.040777033637237e-06, "loss": 0.8675, "step": 6355 }, { "epoch": 0.22450538436745274, "grad_norm": 1.6070340871810913, "learning_rate": 9.040440110936808e-06, "loss": 0.8555, "step": 6356 }, { "epoch": 0.22454070617116062, "grad_norm": 1.9278316497802734, "learning_rate": 9.040103135355858e-06, "loss": 0.8877, "step": 6357 }, { "epoch": 0.22457602797486853, "grad_norm": 1.7139782905578613, "learning_rate": 9.039766106898797e-06, "loss": 0.8652, "step": 6358 }, { "epoch": 0.22461134977857644, "grad_norm": 1.6749521493911743, "learning_rate": 9.039429025570035e-06, "loss": 0.8443, "step": 6359 }, { "epoch": 0.22464667158228435, "grad_norm": 1.6443085670471191, "learning_rate": 9.039091891373987e-06, "loss": 0.8528, "step": 6360 }, { "epoch": 0.22468199338599226, "grad_norm": 1.710092544555664, "learning_rate": 9.038754704315065e-06, "loss": 0.8626, "step": 6361 }, { "epoch": 0.22471731518970017, "grad_norm": 1.743911862373352, "learning_rate": 9.038417464397678e-06, "loss": 0.8681, "step": 6362 }, { "epoch": 0.22475263699340806, "grad_norm": 1.9497238397598267, "learning_rate": 9.038080171626244e-06, "loss": 0.8462, "step": 6363 }, { "epoch": 0.22478795879711597, "grad_norm": 1.6931729316711426, "learning_rate": 9.037742826005176e-06, "loss": 0.8171, "step": 6364 }, { "epoch": 0.22482328060082388, "grad_norm": 1.6290185451507568, "learning_rate": 9.037405427538889e-06, "loss": 0.8547, "step": 6365 }, { "epoch": 0.2248586024045318, "grad_norm": 1.7422338724136353, "learning_rate": 9.037067976231797e-06, "loss": 0.8791, "step": 6366 }, { "epoch": 0.2248939242082397, "grad_norm": 1.6675480604171753, "learning_rate": 9.036730472088319e-06, "loss": 0.8418, "step": 6367 }, { "epoch": 0.2249292460119476, "grad_norm": 1.5602948665618896, "learning_rate": 9.036392915112872e-06, "loss": 0.8341, "step": 6368 }, { "epoch": 0.22496456781565552, "grad_norm": 1.5519767999649048, "learning_rate": 9.036055305309871e-06, "loss": 0.8458, "step": 6369 }, { "epoch": 0.2249998896193634, "grad_norm": 1.6801973581314087, "learning_rate": 9.03571764268374e-06, "loss": 0.8507, "step": 6370 }, { "epoch": 0.2250352114230713, "grad_norm": 1.7297351360321045, "learning_rate": 9.035379927238894e-06, "loss": 0.8202, "step": 6371 }, { "epoch": 0.22507053322677922, "grad_norm": 1.638407588005066, "learning_rate": 9.035042158979755e-06, "loss": 0.8166, "step": 6372 }, { "epoch": 0.22510585503048713, "grad_norm": 1.8996306657791138, "learning_rate": 9.034704337910741e-06, "loss": 0.8711, "step": 6373 }, { "epoch": 0.22514117683419504, "grad_norm": 1.6000149250030518, "learning_rate": 9.034366464036275e-06, "loss": 0.809, "step": 6374 }, { "epoch": 0.22517649863790296, "grad_norm": 1.7012306451797485, "learning_rate": 9.03402853736078e-06, "loss": 0.822, "step": 6375 }, { "epoch": 0.22521182044161084, "grad_norm": 1.1128290891647339, "learning_rate": 9.033690557888676e-06, "loss": 0.5649, "step": 6376 }, { "epoch": 0.22524714224531875, "grad_norm": 1.7833302021026611, "learning_rate": 9.033352525624392e-06, "loss": 0.8609, "step": 6377 }, { "epoch": 0.22528246404902666, "grad_norm": 1.869813084602356, "learning_rate": 9.033014440572346e-06, "loss": 0.8524, "step": 6378 }, { "epoch": 0.22531778585273457, "grad_norm": 1.9610061645507812, "learning_rate": 9.032676302736965e-06, "loss": 0.8635, "step": 6379 }, { "epoch": 0.22535310765644248, "grad_norm": 1.7829312086105347, "learning_rate": 9.032338112122675e-06, "loss": 0.8361, "step": 6380 }, { "epoch": 0.2253884294601504, "grad_norm": 1.8422752618789673, "learning_rate": 9.031999868733902e-06, "loss": 0.8454, "step": 6381 }, { "epoch": 0.2254237512638583, "grad_norm": 1.7162399291992188, "learning_rate": 9.031661572575072e-06, "loss": 0.8357, "step": 6382 }, { "epoch": 0.22545907306756618, "grad_norm": 1.861729383468628, "learning_rate": 9.031323223650614e-06, "loss": 0.8387, "step": 6383 }, { "epoch": 0.2254943948712741, "grad_norm": 2.0408904552459717, "learning_rate": 9.030984821964955e-06, "loss": 0.8854, "step": 6384 }, { "epoch": 0.225529716674982, "grad_norm": 1.7871341705322266, "learning_rate": 9.030646367522524e-06, "loss": 0.8511, "step": 6385 }, { "epoch": 0.22556503847868992, "grad_norm": 1.9202790260314941, "learning_rate": 9.03030786032775e-06, "loss": 0.848, "step": 6386 }, { "epoch": 0.22560036028239783, "grad_norm": 1.7127104997634888, "learning_rate": 9.029969300385068e-06, "loss": 0.8409, "step": 6387 }, { "epoch": 0.22563568208610574, "grad_norm": 1.7664319276809692, "learning_rate": 9.029630687698902e-06, "loss": 0.8246, "step": 6388 }, { "epoch": 0.22567100388981362, "grad_norm": 1.6694594621658325, "learning_rate": 9.029292022273688e-06, "loss": 0.8247, "step": 6389 }, { "epoch": 0.22570632569352153, "grad_norm": 1.669121503829956, "learning_rate": 9.028953304113858e-06, "loss": 0.8496, "step": 6390 }, { "epoch": 0.22574164749722944, "grad_norm": 1.8618037700653076, "learning_rate": 9.028614533223846e-06, "loss": 0.8205, "step": 6391 }, { "epoch": 0.22577696930093735, "grad_norm": 1.9271115064620972, "learning_rate": 9.02827570960808e-06, "loss": 0.8266, "step": 6392 }, { "epoch": 0.22581229110464526, "grad_norm": 1.9533839225769043, "learning_rate": 9.027936833271002e-06, "loss": 0.8254, "step": 6393 }, { "epoch": 0.22584761290835317, "grad_norm": 2.0605039596557617, "learning_rate": 9.027597904217043e-06, "loss": 0.8505, "step": 6394 }, { "epoch": 0.22588293471206108, "grad_norm": 1.7914007902145386, "learning_rate": 9.027258922450639e-06, "loss": 0.8186, "step": 6395 }, { "epoch": 0.22591825651576897, "grad_norm": 1.7109967470169067, "learning_rate": 9.026919887976228e-06, "loss": 0.8429, "step": 6396 }, { "epoch": 0.22595357831947688, "grad_norm": 1.9101307392120361, "learning_rate": 9.026580800798246e-06, "loss": 0.8586, "step": 6397 }, { "epoch": 0.2259889001231848, "grad_norm": 3.1250710487365723, "learning_rate": 9.026241660921134e-06, "loss": 0.8439, "step": 6398 }, { "epoch": 0.2260242219268927, "grad_norm": 2.0211386680603027, "learning_rate": 9.025902468349326e-06, "loss": 0.8587, "step": 6399 }, { "epoch": 0.2260595437306006, "grad_norm": 2.166304349899292, "learning_rate": 9.025563223087263e-06, "loss": 0.8238, "step": 6400 }, { "epoch": 0.22609486553430852, "grad_norm": 1.6350988149642944, "learning_rate": 9.025223925139384e-06, "loss": 0.8025, "step": 6401 }, { "epoch": 0.2261301873380164, "grad_norm": 1.5885975360870361, "learning_rate": 9.024884574510134e-06, "loss": 0.8464, "step": 6402 }, { "epoch": 0.2261655091417243, "grad_norm": 1.889682412147522, "learning_rate": 9.024545171203949e-06, "loss": 0.8433, "step": 6403 }, { "epoch": 0.22620083094543222, "grad_norm": 1.7168455123901367, "learning_rate": 9.024205715225274e-06, "loss": 0.8307, "step": 6404 }, { "epoch": 0.22623615274914013, "grad_norm": 1.746936321258545, "learning_rate": 9.023866206578551e-06, "loss": 0.8273, "step": 6405 }, { "epoch": 0.22627147455284805, "grad_norm": 1.6541651487350464, "learning_rate": 9.023526645268224e-06, "loss": 0.8364, "step": 6406 }, { "epoch": 0.22630679635655596, "grad_norm": 1.8750414848327637, "learning_rate": 9.023187031298736e-06, "loss": 0.8654, "step": 6407 }, { "epoch": 0.22634211816026387, "grad_norm": 2.0490927696228027, "learning_rate": 9.022847364674531e-06, "loss": 0.877, "step": 6408 }, { "epoch": 0.22637743996397175, "grad_norm": 1.6527140140533447, "learning_rate": 9.022507645400059e-06, "loss": 0.8498, "step": 6409 }, { "epoch": 0.22641276176767966, "grad_norm": 1.6550558805465698, "learning_rate": 9.022167873479762e-06, "loss": 0.8273, "step": 6410 }, { "epoch": 0.22644808357138757, "grad_norm": 1.885295033454895, "learning_rate": 9.021828048918086e-06, "loss": 0.8866, "step": 6411 }, { "epoch": 0.22648340537509548, "grad_norm": 1.7985162734985352, "learning_rate": 9.021488171719483e-06, "loss": 0.841, "step": 6412 }, { "epoch": 0.2265187271788034, "grad_norm": 1.8999656438827515, "learning_rate": 9.021148241888399e-06, "loss": 0.8332, "step": 6413 }, { "epoch": 0.2265540489825113, "grad_norm": 1.6727763414382935, "learning_rate": 9.02080825942928e-06, "loss": 0.8873, "step": 6414 }, { "epoch": 0.22658937078621919, "grad_norm": 2.1170260906219482, "learning_rate": 9.02046822434658e-06, "loss": 0.864, "step": 6415 }, { "epoch": 0.2266246925899271, "grad_norm": 1.7656457424163818, "learning_rate": 9.020128136644748e-06, "loss": 0.8524, "step": 6416 }, { "epoch": 0.226660014393635, "grad_norm": 2.286166191101074, "learning_rate": 9.019787996328233e-06, "loss": 0.8537, "step": 6417 }, { "epoch": 0.22669533619734292, "grad_norm": 1.6532131433486938, "learning_rate": 9.01944780340149e-06, "loss": 0.8181, "step": 6418 }, { "epoch": 0.22673065800105083, "grad_norm": 1.7868903875350952, "learning_rate": 9.01910755786897e-06, "loss": 0.8564, "step": 6419 }, { "epoch": 0.22676597980475874, "grad_norm": 1.7676805257797241, "learning_rate": 9.018767259735124e-06, "loss": 0.8325, "step": 6420 }, { "epoch": 0.22680130160846665, "grad_norm": 1.7275221347808838, "learning_rate": 9.01842690900441e-06, "loss": 0.8467, "step": 6421 }, { "epoch": 0.22683662341217453, "grad_norm": 2.129570722579956, "learning_rate": 9.018086505681278e-06, "loss": 0.8574, "step": 6422 }, { "epoch": 0.22687194521588244, "grad_norm": 1.7711451053619385, "learning_rate": 9.017746049770185e-06, "loss": 0.8378, "step": 6423 }, { "epoch": 0.22690726701959035, "grad_norm": 1.7736369371414185, "learning_rate": 9.017405541275588e-06, "loss": 0.837, "step": 6424 }, { "epoch": 0.22694258882329826, "grad_norm": 1.673731803894043, "learning_rate": 9.017064980201941e-06, "loss": 0.8345, "step": 6425 }, { "epoch": 0.22697791062700617, "grad_norm": 1.5283136367797852, "learning_rate": 9.016724366553705e-06, "loss": 0.7868, "step": 6426 }, { "epoch": 0.22701323243071408, "grad_norm": 1.80222487449646, "learning_rate": 9.016383700335334e-06, "loss": 0.8408, "step": 6427 }, { "epoch": 0.22704855423442197, "grad_norm": 1.8585032224655151, "learning_rate": 9.016042981551288e-06, "loss": 0.8343, "step": 6428 }, { "epoch": 0.22708387603812988, "grad_norm": 1.8361295461654663, "learning_rate": 9.015702210206028e-06, "loss": 0.8579, "step": 6429 }, { "epoch": 0.2271191978418378, "grad_norm": 1.5627517700195312, "learning_rate": 9.01536138630401e-06, "loss": 0.8032, "step": 6430 }, { "epoch": 0.2271545196455457, "grad_norm": 1.7139631509780884, "learning_rate": 9.0150205098497e-06, "loss": 0.8594, "step": 6431 }, { "epoch": 0.2271898414492536, "grad_norm": 1.6284939050674438, "learning_rate": 9.014679580847552e-06, "loss": 0.8404, "step": 6432 }, { "epoch": 0.22722516325296152, "grad_norm": 1.6921169757843018, "learning_rate": 9.014338599302034e-06, "loss": 0.8604, "step": 6433 }, { "epoch": 0.22726048505666943, "grad_norm": 1.6937135457992554, "learning_rate": 9.013997565217609e-06, "loss": 0.8427, "step": 6434 }, { "epoch": 0.22729580686037731, "grad_norm": 1.690503716468811, "learning_rate": 9.013656478598738e-06, "loss": 0.8523, "step": 6435 }, { "epoch": 0.22733112866408522, "grad_norm": 1.8776580095291138, "learning_rate": 9.013315339449883e-06, "loss": 0.8173, "step": 6436 }, { "epoch": 0.22736645046779314, "grad_norm": 1.859116554260254, "learning_rate": 9.012974147775515e-06, "loss": 0.8382, "step": 6437 }, { "epoch": 0.22740177227150105, "grad_norm": 1.722447156906128, "learning_rate": 9.012632903580093e-06, "loss": 0.8402, "step": 6438 }, { "epoch": 0.22743709407520896, "grad_norm": 1.7585575580596924, "learning_rate": 9.012291606868087e-06, "loss": 0.8512, "step": 6439 }, { "epoch": 0.22747241587891687, "grad_norm": 1.725915551185608, "learning_rate": 9.011950257643962e-06, "loss": 0.8331, "step": 6440 }, { "epoch": 0.22750773768262475, "grad_norm": 1.8545957803726196, "learning_rate": 9.011608855912186e-06, "loss": 0.8887, "step": 6441 }, { "epoch": 0.22754305948633266, "grad_norm": 1.7344460487365723, "learning_rate": 9.011267401677228e-06, "loss": 0.8257, "step": 6442 }, { "epoch": 0.22757838129004057, "grad_norm": 1.9028412103652954, "learning_rate": 9.010925894943557e-06, "loss": 0.8072, "step": 6443 }, { "epoch": 0.22761370309374848, "grad_norm": 2.4507429599761963, "learning_rate": 9.01058433571564e-06, "loss": 0.7996, "step": 6444 }, { "epoch": 0.2276490248974564, "grad_norm": 1.8511872291564941, "learning_rate": 9.010242723997951e-06, "loss": 0.8725, "step": 6445 }, { "epoch": 0.2276843467011643, "grad_norm": 1.5635828971862793, "learning_rate": 9.009901059794957e-06, "loss": 0.8535, "step": 6446 }, { "epoch": 0.2277196685048722, "grad_norm": 1.6421213150024414, "learning_rate": 9.009559343111133e-06, "loss": 0.814, "step": 6447 }, { "epoch": 0.2277549903085801, "grad_norm": 1.836290955543518, "learning_rate": 9.00921757395095e-06, "loss": 0.8427, "step": 6448 }, { "epoch": 0.227790312112288, "grad_norm": 1.7714340686798096, "learning_rate": 9.008875752318881e-06, "loss": 0.8479, "step": 6449 }, { "epoch": 0.22782563391599592, "grad_norm": 1.0257511138916016, "learning_rate": 9.0085338782194e-06, "loss": 0.6104, "step": 6450 }, { "epoch": 0.22786095571970383, "grad_norm": 2.7857182025909424, "learning_rate": 9.00819195165698e-06, "loss": 0.8759, "step": 6451 }, { "epoch": 0.22789627752341174, "grad_norm": 1.699385166168213, "learning_rate": 9.007849972636098e-06, "loss": 0.8228, "step": 6452 }, { "epoch": 0.22793159932711965, "grad_norm": 1.6853712797164917, "learning_rate": 9.007507941161229e-06, "loss": 0.8786, "step": 6453 }, { "epoch": 0.22796692113082753, "grad_norm": 1.980105996131897, "learning_rate": 9.00716585723685e-06, "loss": 0.8671, "step": 6454 }, { "epoch": 0.22800224293453544, "grad_norm": 1.6990412473678589, "learning_rate": 9.006823720867437e-06, "loss": 0.841, "step": 6455 }, { "epoch": 0.22803756473824335, "grad_norm": 1.7662975788116455, "learning_rate": 9.006481532057467e-06, "loss": 0.8924, "step": 6456 }, { "epoch": 0.22807288654195126, "grad_norm": 1.8474466800689697, "learning_rate": 9.006139290811422e-06, "loss": 0.8213, "step": 6457 }, { "epoch": 0.22810820834565917, "grad_norm": 1.8200311660766602, "learning_rate": 9.005796997133777e-06, "loss": 0.8301, "step": 6458 }, { "epoch": 0.22814353014936709, "grad_norm": 1.8937907218933105, "learning_rate": 9.005454651029015e-06, "loss": 0.8443, "step": 6459 }, { "epoch": 0.228178851953075, "grad_norm": 1.8181123733520508, "learning_rate": 9.005112252501615e-06, "loss": 0.8429, "step": 6460 }, { "epoch": 0.22821417375678288, "grad_norm": 1.815090298652649, "learning_rate": 9.00476980155606e-06, "loss": 0.8706, "step": 6461 }, { "epoch": 0.2282494955604908, "grad_norm": 1.879004716873169, "learning_rate": 9.004427298196831e-06, "loss": 0.8237, "step": 6462 }, { "epoch": 0.2282848173641987, "grad_norm": 1.828427791595459, "learning_rate": 9.004084742428409e-06, "loss": 0.8319, "step": 6463 }, { "epoch": 0.2283201391679066, "grad_norm": 1.684997797012329, "learning_rate": 9.003742134255278e-06, "loss": 0.8279, "step": 6464 }, { "epoch": 0.22835546097161452, "grad_norm": 1.9723471403121948, "learning_rate": 9.003399473681924e-06, "loss": 0.8935, "step": 6465 }, { "epoch": 0.22839078277532243, "grad_norm": 1.8882302045822144, "learning_rate": 9.003056760712829e-06, "loss": 0.8724, "step": 6466 }, { "epoch": 0.22842610457903031, "grad_norm": 1.6095112562179565, "learning_rate": 9.00271399535248e-06, "loss": 0.8533, "step": 6467 }, { "epoch": 0.22846142638273823, "grad_norm": 1.780703067779541, "learning_rate": 9.002371177605363e-06, "loss": 0.8821, "step": 6468 }, { "epoch": 0.22849674818644614, "grad_norm": 1.6878196001052856, "learning_rate": 9.002028307475963e-06, "loss": 0.8542, "step": 6469 }, { "epoch": 0.22853206999015405, "grad_norm": 1.9410045146942139, "learning_rate": 9.00168538496877e-06, "loss": 0.8508, "step": 6470 }, { "epoch": 0.22856739179386196, "grad_norm": 1.901746392250061, "learning_rate": 9.001342410088272e-06, "loss": 0.9098, "step": 6471 }, { "epoch": 0.22860271359756987, "grad_norm": 1.766270637512207, "learning_rate": 9.000999382838956e-06, "loss": 0.867, "step": 6472 }, { "epoch": 0.22863803540127778, "grad_norm": 1.7172019481658936, "learning_rate": 9.00065630322531e-06, "loss": 0.8699, "step": 6473 }, { "epoch": 0.22867335720498566, "grad_norm": 2.5137579441070557, "learning_rate": 9.000313171251828e-06, "loss": 0.8211, "step": 6474 }, { "epoch": 0.22870867900869357, "grad_norm": 1.7126895189285278, "learning_rate": 8.999969986923e-06, "loss": 0.8196, "step": 6475 }, { "epoch": 0.22874400081240148, "grad_norm": 1.685591697692871, "learning_rate": 8.999626750243316e-06, "loss": 0.8364, "step": 6476 }, { "epoch": 0.2287793226161094, "grad_norm": 1.836168646812439, "learning_rate": 8.99928346121727e-06, "loss": 0.8524, "step": 6477 }, { "epoch": 0.2288146444198173, "grad_norm": 1.8334685564041138, "learning_rate": 8.998940119849352e-06, "loss": 0.8537, "step": 6478 }, { "epoch": 0.22884996622352521, "grad_norm": 1.7597407102584839, "learning_rate": 8.998596726144057e-06, "loss": 0.8266, "step": 6479 }, { "epoch": 0.2288852880272331, "grad_norm": 1.7378339767456055, "learning_rate": 8.998253280105882e-06, "loss": 0.8418, "step": 6480 }, { "epoch": 0.228920609830941, "grad_norm": 1.8338600397109985, "learning_rate": 8.997909781739318e-06, "loss": 0.856, "step": 6481 }, { "epoch": 0.22895593163464892, "grad_norm": 1.686698317527771, "learning_rate": 8.997566231048864e-06, "loss": 0.8501, "step": 6482 }, { "epoch": 0.22899125343835683, "grad_norm": 1.8787448406219482, "learning_rate": 8.997222628039012e-06, "loss": 0.8081, "step": 6483 }, { "epoch": 0.22902657524206474, "grad_norm": 3.0478808879852295, "learning_rate": 8.996878972714265e-06, "loss": 0.8658, "step": 6484 }, { "epoch": 0.22906189704577265, "grad_norm": 1.7363842725753784, "learning_rate": 8.996535265079115e-06, "loss": 0.834, "step": 6485 }, { "epoch": 0.22909721884948056, "grad_norm": 1.6939592361450195, "learning_rate": 8.996191505138063e-06, "loss": 0.8938, "step": 6486 }, { "epoch": 0.22913254065318844, "grad_norm": 1.6425398588180542, "learning_rate": 8.995847692895607e-06, "loss": 0.8709, "step": 6487 }, { "epoch": 0.22916786245689635, "grad_norm": 1.8221009969711304, "learning_rate": 8.995503828356249e-06, "loss": 0.8158, "step": 6488 }, { "epoch": 0.22920318426060426, "grad_norm": 1.8906376361846924, "learning_rate": 8.995159911524487e-06, "loss": 0.8604, "step": 6489 }, { "epoch": 0.22923850606431218, "grad_norm": 1.5800645351409912, "learning_rate": 8.994815942404824e-06, "loss": 0.8126, "step": 6490 }, { "epoch": 0.22927382786802009, "grad_norm": 1.5996990203857422, "learning_rate": 8.994471921001762e-06, "loss": 0.7997, "step": 6491 }, { "epoch": 0.229309149671728, "grad_norm": 1.7211719751358032, "learning_rate": 8.9941278473198e-06, "loss": 0.8195, "step": 6492 }, { "epoch": 0.22934447147543588, "grad_norm": 1.7139884233474731, "learning_rate": 8.993783721363443e-06, "loss": 0.8455, "step": 6493 }, { "epoch": 0.2293797932791438, "grad_norm": 1.7737314701080322, "learning_rate": 8.993439543137198e-06, "loss": 0.8754, "step": 6494 }, { "epoch": 0.2294151150828517, "grad_norm": 1.6199238300323486, "learning_rate": 8.993095312645565e-06, "loss": 0.8482, "step": 6495 }, { "epoch": 0.2294504368865596, "grad_norm": 2.033175230026245, "learning_rate": 8.992751029893052e-06, "loss": 0.8495, "step": 6496 }, { "epoch": 0.22948575869026752, "grad_norm": 1.8555543422698975, "learning_rate": 8.992406694884163e-06, "loss": 0.8569, "step": 6497 }, { "epoch": 0.22952108049397543, "grad_norm": 1.7642865180969238, "learning_rate": 8.992062307623407e-06, "loss": 0.8672, "step": 6498 }, { "epoch": 0.22955640229768334, "grad_norm": 1.8157943487167358, "learning_rate": 8.991717868115291e-06, "loss": 0.8443, "step": 6499 }, { "epoch": 0.22959172410139123, "grad_norm": 1.648483157157898, "learning_rate": 8.99137337636432e-06, "loss": 0.8032, "step": 6500 }, { "epoch": 0.22962704590509914, "grad_norm": 1.7406837940216064, "learning_rate": 8.991028832375006e-06, "loss": 0.8389, "step": 6501 }, { "epoch": 0.22966236770880705, "grad_norm": 1.5122780799865723, "learning_rate": 8.990684236151855e-06, "loss": 0.8186, "step": 6502 }, { "epoch": 0.22969768951251496, "grad_norm": 1.6927943229675293, "learning_rate": 8.99033958769938e-06, "loss": 0.8155, "step": 6503 }, { "epoch": 0.22973301131622287, "grad_norm": 1.6353869438171387, "learning_rate": 8.989994887022092e-06, "loss": 0.7808, "step": 6504 }, { "epoch": 0.22976833311993078, "grad_norm": 1.7180123329162598, "learning_rate": 8.989650134124497e-06, "loss": 0.8596, "step": 6505 }, { "epoch": 0.2298036549236387, "grad_norm": 1.6229305267333984, "learning_rate": 8.989305329011115e-06, "loss": 0.8414, "step": 6506 }, { "epoch": 0.22983897672734657, "grad_norm": 1.7216053009033203, "learning_rate": 8.988960471686452e-06, "loss": 0.83, "step": 6507 }, { "epoch": 0.22987429853105448, "grad_norm": 1.7007728815078735, "learning_rate": 8.988615562155025e-06, "loss": 0.8397, "step": 6508 }, { "epoch": 0.2299096203347624, "grad_norm": 1.6334013938903809, "learning_rate": 8.988270600421349e-06, "loss": 0.8227, "step": 6509 }, { "epoch": 0.2299449421384703, "grad_norm": 1.7889348268508911, "learning_rate": 8.987925586489935e-06, "loss": 0.8091, "step": 6510 }, { "epoch": 0.22998026394217821, "grad_norm": 1.702196717262268, "learning_rate": 8.987580520365301e-06, "loss": 0.8647, "step": 6511 }, { "epoch": 0.23001558574588613, "grad_norm": 1.8039450645446777, "learning_rate": 8.987235402051964e-06, "loss": 0.8604, "step": 6512 }, { "epoch": 0.230050907549594, "grad_norm": 1.9757413864135742, "learning_rate": 8.986890231554437e-06, "loss": 0.8695, "step": 6513 }, { "epoch": 0.23008622935330192, "grad_norm": 1.7540909051895142, "learning_rate": 8.986545008877242e-06, "loss": 0.8445, "step": 6514 }, { "epoch": 0.23012155115700983, "grad_norm": 1.1864659786224365, "learning_rate": 8.986199734024896e-06, "loss": 0.5967, "step": 6515 }, { "epoch": 0.23015687296071774, "grad_norm": 1.9935835599899292, "learning_rate": 8.985854407001915e-06, "loss": 0.8554, "step": 6516 }, { "epoch": 0.23019219476442565, "grad_norm": 1.9148447513580322, "learning_rate": 8.985509027812822e-06, "loss": 0.8407, "step": 6517 }, { "epoch": 0.23022751656813356, "grad_norm": 1.8918452262878418, "learning_rate": 8.985163596462137e-06, "loss": 0.8174, "step": 6518 }, { "epoch": 0.23026283837184147, "grad_norm": 1.6880125999450684, "learning_rate": 8.98481811295438e-06, "loss": 0.8259, "step": 6519 }, { "epoch": 0.23029816017554935, "grad_norm": 1.8223611116409302, "learning_rate": 8.98447257729407e-06, "loss": 0.8748, "step": 6520 }, { "epoch": 0.23033348197925727, "grad_norm": 1.0847822427749634, "learning_rate": 8.984126989485732e-06, "loss": 0.6203, "step": 6521 }, { "epoch": 0.23036880378296518, "grad_norm": 2.047630786895752, "learning_rate": 8.983781349533892e-06, "loss": 0.8944, "step": 6522 }, { "epoch": 0.2304041255866731, "grad_norm": 1.799155354499817, "learning_rate": 8.983435657443069e-06, "loss": 0.8578, "step": 6523 }, { "epoch": 0.230439447390381, "grad_norm": 1.7385553121566772, "learning_rate": 8.983089913217788e-06, "loss": 0.8369, "step": 6524 }, { "epoch": 0.2304747691940889, "grad_norm": 1.645531415939331, "learning_rate": 8.982744116862576e-06, "loss": 0.8755, "step": 6525 }, { "epoch": 0.2305100909977968, "grad_norm": 1.747751235961914, "learning_rate": 8.982398268381957e-06, "loss": 0.8339, "step": 6526 }, { "epoch": 0.2305454128015047, "grad_norm": 1.9195390939712524, "learning_rate": 8.982052367780461e-06, "loss": 0.838, "step": 6527 }, { "epoch": 0.2305807346052126, "grad_norm": 2.022491931915283, "learning_rate": 8.981706415062608e-06, "loss": 0.8501, "step": 6528 }, { "epoch": 0.23061605640892052, "grad_norm": 1.5473418235778809, "learning_rate": 8.981360410232933e-06, "loss": 0.825, "step": 6529 }, { "epoch": 0.23065137821262843, "grad_norm": 2.0058727264404297, "learning_rate": 8.98101435329596e-06, "loss": 0.8291, "step": 6530 }, { "epoch": 0.23068670001633634, "grad_norm": 1.7259937524795532, "learning_rate": 8.980668244256219e-06, "loss": 0.8323, "step": 6531 }, { "epoch": 0.23072202182004425, "grad_norm": 1.812644600868225, "learning_rate": 8.98032208311824e-06, "loss": 0.8334, "step": 6532 }, { "epoch": 0.23075734362375214, "grad_norm": 1.9648081064224243, "learning_rate": 8.979975869886554e-06, "loss": 0.8543, "step": 6533 }, { "epoch": 0.23079266542746005, "grad_norm": 1.7909767627716064, "learning_rate": 8.979629604565694e-06, "loss": 0.7989, "step": 6534 }, { "epoch": 0.23082798723116796, "grad_norm": 1.7256759405136108, "learning_rate": 8.979283287160187e-06, "loss": 0.863, "step": 6535 }, { "epoch": 0.23086330903487587, "grad_norm": 2.449711799621582, "learning_rate": 8.978936917674572e-06, "loss": 0.8836, "step": 6536 }, { "epoch": 0.23089863083858378, "grad_norm": 1.5464222431182861, "learning_rate": 8.978590496113376e-06, "loss": 0.8367, "step": 6537 }, { "epoch": 0.2309339526422917, "grad_norm": 1.649619221687317, "learning_rate": 8.978244022481136e-06, "loss": 0.807, "step": 6538 }, { "epoch": 0.23096927444599957, "grad_norm": 1.870128870010376, "learning_rate": 8.977897496782385e-06, "loss": 0.8886, "step": 6539 }, { "epoch": 0.23100459624970748, "grad_norm": 1.891660213470459, "learning_rate": 8.977550919021661e-06, "loss": 0.8697, "step": 6540 }, { "epoch": 0.2310399180534154, "grad_norm": 1.6505889892578125, "learning_rate": 8.977204289203496e-06, "loss": 0.838, "step": 6541 }, { "epoch": 0.2310752398571233, "grad_norm": 1.633586049079895, "learning_rate": 8.976857607332431e-06, "loss": 0.8314, "step": 6542 }, { "epoch": 0.23111056166083122, "grad_norm": 1.7755221128463745, "learning_rate": 8.976510873413001e-06, "loss": 0.8698, "step": 6543 }, { "epoch": 0.23114588346453913, "grad_norm": 1.5804150104522705, "learning_rate": 8.976164087449744e-06, "loss": 0.8279, "step": 6544 }, { "epoch": 0.23118120526824704, "grad_norm": 1.7002183198928833, "learning_rate": 8.975817249447198e-06, "loss": 0.851, "step": 6545 }, { "epoch": 0.23121652707195492, "grad_norm": 1.6892859935760498, "learning_rate": 8.975470359409903e-06, "loss": 0.8633, "step": 6546 }, { "epoch": 0.23125184887566283, "grad_norm": 1.7407130002975464, "learning_rate": 8.9751234173424e-06, "loss": 0.8111, "step": 6547 }, { "epoch": 0.23128717067937074, "grad_norm": 2.0070242881774902, "learning_rate": 8.97477642324923e-06, "loss": 0.8378, "step": 6548 }, { "epoch": 0.23132249248307865, "grad_norm": 1.1166306734085083, "learning_rate": 8.974429377134933e-06, "loss": 0.62, "step": 6549 }, { "epoch": 0.23135781428678656, "grad_norm": 2.013978958129883, "learning_rate": 8.97408227900405e-06, "loss": 0.8379, "step": 6550 }, { "epoch": 0.23139313609049447, "grad_norm": 1.8498748540878296, "learning_rate": 8.973735128861126e-06, "loss": 0.8704, "step": 6551 }, { "epoch": 0.23142845789420236, "grad_norm": 1.9270603656768799, "learning_rate": 8.973387926710703e-06, "loss": 0.8324, "step": 6552 }, { "epoch": 0.23146377969791027, "grad_norm": 1.9529850482940674, "learning_rate": 8.973040672557328e-06, "loss": 0.8421, "step": 6553 }, { "epoch": 0.23149910150161818, "grad_norm": 2.111978530883789, "learning_rate": 8.97269336640554e-06, "loss": 0.833, "step": 6554 }, { "epoch": 0.2315344233053261, "grad_norm": 1.7308744192123413, "learning_rate": 8.97234600825989e-06, "loss": 0.8581, "step": 6555 }, { "epoch": 0.231569745109034, "grad_norm": 1.7346636056900024, "learning_rate": 8.971998598124922e-06, "loss": 0.8445, "step": 6556 }, { "epoch": 0.2316050669127419, "grad_norm": 1.7124205827713013, "learning_rate": 8.971651136005184e-06, "loss": 0.8522, "step": 6557 }, { "epoch": 0.23164038871644982, "grad_norm": 1.7454718351364136, "learning_rate": 8.97130362190522e-06, "loss": 0.8678, "step": 6558 }, { "epoch": 0.2316757105201577, "grad_norm": 2.065434694290161, "learning_rate": 8.970956055829584e-06, "loss": 0.8347, "step": 6559 }, { "epoch": 0.2317110323238656, "grad_norm": 1.8844432830810547, "learning_rate": 8.97060843778282e-06, "loss": 0.8758, "step": 6560 }, { "epoch": 0.23174635412757352, "grad_norm": 1.792781949043274, "learning_rate": 8.97026076776948e-06, "loss": 0.8469, "step": 6561 }, { "epoch": 0.23178167593128143, "grad_norm": 1.9173917770385742, "learning_rate": 8.969913045794112e-06, "loss": 0.8362, "step": 6562 }, { "epoch": 0.23181699773498934, "grad_norm": 1.685705304145813, "learning_rate": 8.969565271861268e-06, "loss": 0.8516, "step": 6563 }, { "epoch": 0.23185231953869725, "grad_norm": 1.9461299180984497, "learning_rate": 8.9692174459755e-06, "loss": 0.8413, "step": 6564 }, { "epoch": 0.23188764134240514, "grad_norm": 1.869163155555725, "learning_rate": 8.968869568141361e-06, "loss": 0.8505, "step": 6565 }, { "epoch": 0.23192296314611305, "grad_norm": 1.8057200908660889, "learning_rate": 8.968521638363404e-06, "loss": 0.8217, "step": 6566 }, { "epoch": 0.23195828494982096, "grad_norm": 1.6797624826431274, "learning_rate": 8.96817365664618e-06, "loss": 0.8584, "step": 6567 }, { "epoch": 0.23199360675352887, "grad_norm": 1.5956978797912598, "learning_rate": 8.967825622994246e-06, "loss": 0.8691, "step": 6568 }, { "epoch": 0.23202892855723678, "grad_norm": 1.86985445022583, "learning_rate": 8.967477537412156e-06, "loss": 0.8401, "step": 6569 }, { "epoch": 0.2320642503609447, "grad_norm": 2.148958444595337, "learning_rate": 8.967129399904467e-06, "loss": 0.8609, "step": 6570 }, { "epoch": 0.2320995721646526, "grad_norm": 1.789799451828003, "learning_rate": 8.966781210475732e-06, "loss": 0.8806, "step": 6571 }, { "epoch": 0.23213489396836048, "grad_norm": 1.7815495729446411, "learning_rate": 8.966432969130512e-06, "loss": 0.8659, "step": 6572 }, { "epoch": 0.2321702157720684, "grad_norm": 1.7068119049072266, "learning_rate": 8.96608467587336e-06, "loss": 0.8309, "step": 6573 }, { "epoch": 0.2322055375757763, "grad_norm": 1.0875366926193237, "learning_rate": 8.96573633070884e-06, "loss": 0.6031, "step": 6574 }, { "epoch": 0.23224085937948422, "grad_norm": 1.856000542640686, "learning_rate": 8.965387933641509e-06, "loss": 0.863, "step": 6575 }, { "epoch": 0.23227618118319213, "grad_norm": 1.9609460830688477, "learning_rate": 8.965039484675923e-06, "loss": 0.8676, "step": 6576 }, { "epoch": 0.23231150298690004, "grad_norm": 1.9620651006698608, "learning_rate": 8.964690983816649e-06, "loss": 0.8709, "step": 6577 }, { "epoch": 0.23234682479060792, "grad_norm": 2.067490816116333, "learning_rate": 8.964342431068243e-06, "loss": 0.84, "step": 6578 }, { "epoch": 0.23238214659431583, "grad_norm": 1.5050837993621826, "learning_rate": 8.96399382643527e-06, "loss": 0.819, "step": 6579 }, { "epoch": 0.23241746839802374, "grad_norm": 1.7863647937774658, "learning_rate": 8.963645169922288e-06, "loss": 0.8615, "step": 6580 }, { "epoch": 0.23245279020173165, "grad_norm": 1.878704309463501, "learning_rate": 8.963296461533865e-06, "loss": 0.8183, "step": 6581 }, { "epoch": 0.23248811200543956, "grad_norm": 1.9077640771865845, "learning_rate": 8.962947701274563e-06, "loss": 0.8257, "step": 6582 }, { "epoch": 0.23252343380914747, "grad_norm": 1.7538032531738281, "learning_rate": 8.962598889148946e-06, "loss": 0.8234, "step": 6583 }, { "epoch": 0.23255875561285538, "grad_norm": 1.6902728080749512, "learning_rate": 8.96225002516158e-06, "loss": 0.8385, "step": 6584 }, { "epoch": 0.23259407741656327, "grad_norm": 1.8421398401260376, "learning_rate": 8.96190110931703e-06, "loss": 0.8651, "step": 6585 }, { "epoch": 0.23262939922027118, "grad_norm": 1.7731385231018066, "learning_rate": 8.961552141619863e-06, "loss": 0.8584, "step": 6586 }, { "epoch": 0.2326647210239791, "grad_norm": 1.8236019611358643, "learning_rate": 8.961203122074647e-06, "loss": 0.8659, "step": 6587 }, { "epoch": 0.232700042827687, "grad_norm": 1.9897253513336182, "learning_rate": 8.960854050685947e-06, "loss": 0.8316, "step": 6588 }, { "epoch": 0.2327353646313949, "grad_norm": 1.7944920063018799, "learning_rate": 8.960504927458335e-06, "loss": 0.8435, "step": 6589 }, { "epoch": 0.23277068643510282, "grad_norm": 1.8403549194335938, "learning_rate": 8.96015575239638e-06, "loss": 0.8562, "step": 6590 }, { "epoch": 0.2328060082388107, "grad_norm": 2.09149169921875, "learning_rate": 8.959806525504651e-06, "loss": 0.8624, "step": 6591 }, { "epoch": 0.2328413300425186, "grad_norm": 1.8185654878616333, "learning_rate": 8.959457246787718e-06, "loss": 0.8377, "step": 6592 }, { "epoch": 0.23287665184622652, "grad_norm": 1.81169855594635, "learning_rate": 8.959107916250153e-06, "loss": 0.8611, "step": 6593 }, { "epoch": 0.23291197364993443, "grad_norm": 1.9309775829315186, "learning_rate": 8.958758533896528e-06, "loss": 0.8727, "step": 6594 }, { "epoch": 0.23294729545364234, "grad_norm": 2.122495412826538, "learning_rate": 8.958409099731414e-06, "loss": 0.8676, "step": 6595 }, { "epoch": 0.23298261725735026, "grad_norm": 1.7103475332260132, "learning_rate": 8.958059613759387e-06, "loss": 0.8343, "step": 6596 }, { "epoch": 0.23301793906105817, "grad_norm": 1.6669520139694214, "learning_rate": 8.95771007598502e-06, "loss": 0.844, "step": 6597 }, { "epoch": 0.23305326086476605, "grad_norm": 1.8865585327148438, "learning_rate": 8.957360486412887e-06, "loss": 0.8339, "step": 6598 }, { "epoch": 0.23308858266847396, "grad_norm": 1.9913617372512817, "learning_rate": 8.957010845047565e-06, "loss": 0.8511, "step": 6599 }, { "epoch": 0.23312390447218187, "grad_norm": 2.2000958919525146, "learning_rate": 8.956661151893626e-06, "loss": 0.8329, "step": 6600 }, { "epoch": 0.23315922627588978, "grad_norm": 2.0020530223846436, "learning_rate": 8.956311406955652e-06, "loss": 0.8367, "step": 6601 }, { "epoch": 0.2331945480795977, "grad_norm": 1.5747802257537842, "learning_rate": 8.955961610238218e-06, "loss": 0.8322, "step": 6602 }, { "epoch": 0.2332298698833056, "grad_norm": 1.8837082386016846, "learning_rate": 8.955611761745902e-06, "loss": 0.8246, "step": 6603 }, { "epoch": 0.23326519168701348, "grad_norm": 1.971069574356079, "learning_rate": 8.955261861483282e-06, "loss": 0.831, "step": 6604 }, { "epoch": 0.2333005134907214, "grad_norm": 2.0405023097991943, "learning_rate": 8.954911909454938e-06, "loss": 0.8659, "step": 6605 }, { "epoch": 0.2333358352944293, "grad_norm": 2.0472564697265625, "learning_rate": 8.954561905665453e-06, "loss": 0.8098, "step": 6606 }, { "epoch": 0.23337115709813722, "grad_norm": 1.9957927465438843, "learning_rate": 8.954211850119401e-06, "loss": 0.8691, "step": 6607 }, { "epoch": 0.23340647890184513, "grad_norm": 1.0722936391830444, "learning_rate": 8.95386174282137e-06, "loss": 0.5963, "step": 6608 }, { "epoch": 0.23344180070555304, "grad_norm": 2.030400276184082, "learning_rate": 8.95351158377594e-06, "loss": 0.8407, "step": 6609 }, { "epoch": 0.23347712250926095, "grad_norm": 1.8299485445022583, "learning_rate": 8.953161372987693e-06, "loss": 0.8324, "step": 6610 }, { "epoch": 0.23351244431296883, "grad_norm": 1.6753578186035156, "learning_rate": 8.952811110461214e-06, "loss": 0.8544, "step": 6611 }, { "epoch": 0.23354776611667674, "grad_norm": 1.7998156547546387, "learning_rate": 8.952460796201086e-06, "loss": 0.8585, "step": 6612 }, { "epoch": 0.23358308792038465, "grad_norm": 2.174726724624634, "learning_rate": 8.952110430211894e-06, "loss": 0.8592, "step": 6613 }, { "epoch": 0.23361840972409256, "grad_norm": 2.122607469558716, "learning_rate": 8.951760012498222e-06, "loss": 0.8965, "step": 6614 }, { "epoch": 0.23365373152780047, "grad_norm": 1.896276593208313, "learning_rate": 8.951409543064659e-06, "loss": 0.8203, "step": 6615 }, { "epoch": 0.23368905333150838, "grad_norm": 1.8113914728164673, "learning_rate": 8.951059021915791e-06, "loss": 0.8139, "step": 6616 }, { "epoch": 0.23372437513521627, "grad_norm": 1.736128807067871, "learning_rate": 8.950708449056205e-06, "loss": 0.8581, "step": 6617 }, { "epoch": 0.23375969693892418, "grad_norm": 1.9010735750198364, "learning_rate": 8.950357824490489e-06, "loss": 0.8522, "step": 6618 }, { "epoch": 0.2337950187426321, "grad_norm": 1.7074476480484009, "learning_rate": 8.950007148223232e-06, "loss": 0.851, "step": 6619 }, { "epoch": 0.23383034054634, "grad_norm": 1.7780721187591553, "learning_rate": 8.949656420259025e-06, "loss": 0.8528, "step": 6620 }, { "epoch": 0.2338656623500479, "grad_norm": 1.9624131917953491, "learning_rate": 8.949305640602457e-06, "loss": 0.8163, "step": 6621 }, { "epoch": 0.23390098415375582, "grad_norm": 1.8337535858154297, "learning_rate": 8.94895480925812e-06, "loss": 0.8391, "step": 6622 }, { "epoch": 0.23393630595746373, "grad_norm": 2.451010227203369, "learning_rate": 8.948603926230603e-06, "loss": 0.8367, "step": 6623 }, { "epoch": 0.2339716277611716, "grad_norm": 1.6672104597091675, "learning_rate": 8.9482529915245e-06, "loss": 0.8565, "step": 6624 }, { "epoch": 0.23400694956487952, "grad_norm": 1.6486250162124634, "learning_rate": 8.947902005144407e-06, "loss": 0.8476, "step": 6625 }, { "epoch": 0.23404227136858743, "grad_norm": 1.8482978343963623, "learning_rate": 8.947550967094911e-06, "loss": 0.8599, "step": 6626 }, { "epoch": 0.23407759317229535, "grad_norm": 1.9850187301635742, "learning_rate": 8.947199877380612e-06, "loss": 0.8654, "step": 6627 }, { "epoch": 0.23411291497600326, "grad_norm": 1.7100145816802979, "learning_rate": 8.946848736006103e-06, "loss": 0.8751, "step": 6628 }, { "epoch": 0.23414823677971117, "grad_norm": 1.7766368389129639, "learning_rate": 8.94649754297598e-06, "loss": 0.8802, "step": 6629 }, { "epoch": 0.23418355858341905, "grad_norm": 1.7211509943008423, "learning_rate": 8.946146298294838e-06, "loss": 0.8619, "step": 6630 }, { "epoch": 0.23421888038712696, "grad_norm": 1.8219659328460693, "learning_rate": 8.945795001967274e-06, "loss": 0.8777, "step": 6631 }, { "epoch": 0.23425420219083487, "grad_norm": 1.7724106311798096, "learning_rate": 8.94544365399789e-06, "loss": 0.8641, "step": 6632 }, { "epoch": 0.23428952399454278, "grad_norm": 2.2002930641174316, "learning_rate": 8.945092254391279e-06, "loss": 0.8694, "step": 6633 }, { "epoch": 0.2343248457982507, "grad_norm": 1.6161974668502808, "learning_rate": 8.944740803152042e-06, "loss": 0.8356, "step": 6634 }, { "epoch": 0.2343601676019586, "grad_norm": 1.6897636651992798, "learning_rate": 8.94438930028478e-06, "loss": 0.8204, "step": 6635 }, { "epoch": 0.2343954894056665, "grad_norm": 1.1803834438323975, "learning_rate": 8.944037745794091e-06, "loss": 0.5939, "step": 6636 }, { "epoch": 0.2344308112093744, "grad_norm": 1.5957504510879517, "learning_rate": 8.943686139684578e-06, "loss": 0.8149, "step": 6637 }, { "epoch": 0.2344661330130823, "grad_norm": 1.5407335758209229, "learning_rate": 8.943334481960842e-06, "loss": 0.8204, "step": 6638 }, { "epoch": 0.23450145481679022, "grad_norm": 1.622206211090088, "learning_rate": 8.942982772627486e-06, "loss": 0.8629, "step": 6639 }, { "epoch": 0.23453677662049813, "grad_norm": 2.091472864151001, "learning_rate": 8.942631011689112e-06, "loss": 0.8371, "step": 6640 }, { "epoch": 0.23457209842420604, "grad_norm": 1.8598772287368774, "learning_rate": 8.942279199150326e-06, "loss": 0.8653, "step": 6641 }, { "epoch": 0.23460742022791395, "grad_norm": 1.6905841827392578, "learning_rate": 8.941927335015729e-06, "loss": 0.855, "step": 6642 }, { "epoch": 0.23464274203162183, "grad_norm": 2.711592197418213, "learning_rate": 8.94157541928993e-06, "loss": 0.8631, "step": 6643 }, { "epoch": 0.23467806383532974, "grad_norm": 1.6225605010986328, "learning_rate": 8.941223451977529e-06, "loss": 0.8668, "step": 6644 }, { "epoch": 0.23471338563903765, "grad_norm": 1.0569604635238647, "learning_rate": 8.94087143308314e-06, "loss": 0.6333, "step": 6645 }, { "epoch": 0.23474870744274556, "grad_norm": 1.5861783027648926, "learning_rate": 8.940519362611366e-06, "loss": 0.8333, "step": 6646 }, { "epoch": 0.23478402924645347, "grad_norm": 1.8795273303985596, "learning_rate": 8.940167240566814e-06, "loss": 0.8857, "step": 6647 }, { "epoch": 0.23481935105016138, "grad_norm": 1.8322490453720093, "learning_rate": 8.939815066954095e-06, "loss": 0.8356, "step": 6648 }, { "epoch": 0.2348546728538693, "grad_norm": 1.712976098060608, "learning_rate": 8.939462841777817e-06, "loss": 0.8452, "step": 6649 }, { "epoch": 0.23488999465757718, "grad_norm": 1.7138502597808838, "learning_rate": 8.939110565042589e-06, "loss": 0.8385, "step": 6650 }, { "epoch": 0.2349253164612851, "grad_norm": 1.6580827236175537, "learning_rate": 8.938758236753022e-06, "loss": 0.8386, "step": 6651 }, { "epoch": 0.234960638264993, "grad_norm": 1.918683409690857, "learning_rate": 8.93840585691373e-06, "loss": 0.8253, "step": 6652 }, { "epoch": 0.2349959600687009, "grad_norm": 1.6588431596755981, "learning_rate": 8.93805342552932e-06, "loss": 0.8341, "step": 6653 }, { "epoch": 0.23503128187240882, "grad_norm": 1.8977079391479492, "learning_rate": 8.937700942604407e-06, "loss": 0.8663, "step": 6654 }, { "epoch": 0.23506660367611673, "grad_norm": 1.8983722925186157, "learning_rate": 8.937348408143606e-06, "loss": 0.8289, "step": 6655 }, { "epoch": 0.2351019254798246, "grad_norm": 1.70280122756958, "learning_rate": 8.936995822151528e-06, "loss": 0.8306, "step": 6656 }, { "epoch": 0.23513724728353252, "grad_norm": 1.667015790939331, "learning_rate": 8.936643184632789e-06, "loss": 0.8321, "step": 6657 }, { "epoch": 0.23517256908724044, "grad_norm": 1.6985403299331665, "learning_rate": 8.936290495592007e-06, "loss": 0.8711, "step": 6658 }, { "epoch": 0.23520789089094835, "grad_norm": 1.63823401927948, "learning_rate": 8.93593775503379e-06, "loss": 0.8473, "step": 6659 }, { "epoch": 0.23524321269465626, "grad_norm": 1.8019068241119385, "learning_rate": 8.935584962962763e-06, "loss": 0.8254, "step": 6660 }, { "epoch": 0.23527853449836417, "grad_norm": 1.6123912334442139, "learning_rate": 8.935232119383537e-06, "loss": 0.824, "step": 6661 }, { "epoch": 0.23531385630207208, "grad_norm": 1.9152275323867798, "learning_rate": 8.934879224300734e-06, "loss": 0.8715, "step": 6662 }, { "epoch": 0.23534917810577996, "grad_norm": 1.9373265504837036, "learning_rate": 8.934526277718973e-06, "loss": 0.8788, "step": 6663 }, { "epoch": 0.23538449990948787, "grad_norm": 1.870320439338684, "learning_rate": 8.934173279642868e-06, "loss": 0.8809, "step": 6664 }, { "epoch": 0.23541982171319578, "grad_norm": 1.789798617362976, "learning_rate": 8.933820230077046e-06, "loss": 0.8314, "step": 6665 }, { "epoch": 0.2354551435169037, "grad_norm": 2.354795217514038, "learning_rate": 8.933467129026123e-06, "loss": 0.8647, "step": 6666 }, { "epoch": 0.2354904653206116, "grad_norm": 1.6497074365615845, "learning_rate": 8.933113976494723e-06, "loss": 0.8002, "step": 6667 }, { "epoch": 0.2355257871243195, "grad_norm": 1.836570143699646, "learning_rate": 8.932760772487466e-06, "loss": 0.843, "step": 6668 }, { "epoch": 0.2355611089280274, "grad_norm": 1.7476226091384888, "learning_rate": 8.932407517008974e-06, "loss": 0.8256, "step": 6669 }, { "epoch": 0.2355964307317353, "grad_norm": 1.7866430282592773, "learning_rate": 8.932054210063872e-06, "loss": 0.891, "step": 6670 }, { "epoch": 0.23563175253544322, "grad_norm": 1.7163809537887573, "learning_rate": 8.931700851656784e-06, "loss": 0.8548, "step": 6671 }, { "epoch": 0.23566707433915113, "grad_norm": 1.6593050956726074, "learning_rate": 8.931347441792335e-06, "loss": 0.8442, "step": 6672 }, { "epoch": 0.23570239614285904, "grad_norm": 1.7271418571472168, "learning_rate": 8.930993980475149e-06, "loss": 0.8574, "step": 6673 }, { "epoch": 0.23573771794656695, "grad_norm": 1.7851972579956055, "learning_rate": 8.930640467709852e-06, "loss": 0.8579, "step": 6674 }, { "epoch": 0.23577303975027486, "grad_norm": 1.6729575395584106, "learning_rate": 8.930286903501073e-06, "loss": 0.8441, "step": 6675 }, { "epoch": 0.23580836155398274, "grad_norm": 1.6208529472351074, "learning_rate": 8.929933287853437e-06, "loss": 0.8314, "step": 6676 }, { "epoch": 0.23584368335769065, "grad_norm": 1.7217484712600708, "learning_rate": 8.929579620771573e-06, "loss": 0.8209, "step": 6677 }, { "epoch": 0.23587900516139856, "grad_norm": 1.5701106786727905, "learning_rate": 8.92922590226011e-06, "loss": 0.8599, "step": 6678 }, { "epoch": 0.23591432696510647, "grad_norm": 1.5639533996582031, "learning_rate": 8.928872132323675e-06, "loss": 0.8302, "step": 6679 }, { "epoch": 0.23594964876881439, "grad_norm": 1.634222149848938, "learning_rate": 8.928518310966903e-06, "loss": 0.8118, "step": 6680 }, { "epoch": 0.2359849705725223, "grad_norm": 1.5928789377212524, "learning_rate": 8.92816443819442e-06, "loss": 0.8707, "step": 6681 }, { "epoch": 0.23602029237623018, "grad_norm": 1.7999027967453003, "learning_rate": 8.92781051401086e-06, "loss": 0.8055, "step": 6682 }, { "epoch": 0.2360556141799381, "grad_norm": 1.5377185344696045, "learning_rate": 8.927456538420855e-06, "loss": 0.8508, "step": 6683 }, { "epoch": 0.236090935983646, "grad_norm": 1.7647449970245361, "learning_rate": 8.927102511429036e-06, "loss": 0.8428, "step": 6684 }, { "epoch": 0.2361262577873539, "grad_norm": 1.9097648859024048, "learning_rate": 8.926748433040038e-06, "loss": 0.8729, "step": 6685 }, { "epoch": 0.23616157959106182, "grad_norm": 2.2241876125335693, "learning_rate": 8.926394303258495e-06, "loss": 0.8639, "step": 6686 }, { "epoch": 0.23619690139476973, "grad_norm": 1.824649691581726, "learning_rate": 8.926040122089039e-06, "loss": 0.8281, "step": 6687 }, { "epoch": 0.23623222319847764, "grad_norm": 1.63150954246521, "learning_rate": 8.92568588953631e-06, "loss": 0.8464, "step": 6688 }, { "epoch": 0.23626754500218553, "grad_norm": 1.7467539310455322, "learning_rate": 8.925331605604943e-06, "loss": 0.8423, "step": 6689 }, { "epoch": 0.23630286680589344, "grad_norm": 1.6396160125732422, "learning_rate": 8.924977270299571e-06, "loss": 0.8286, "step": 6690 }, { "epoch": 0.23633818860960135, "grad_norm": 2.02685809135437, "learning_rate": 8.924622883624836e-06, "loss": 0.835, "step": 6691 }, { "epoch": 0.23637351041330926, "grad_norm": 1.712904453277588, "learning_rate": 8.924268445585376e-06, "loss": 0.8733, "step": 6692 }, { "epoch": 0.23640883221701717, "grad_norm": 1.6790173053741455, "learning_rate": 8.923913956185826e-06, "loss": 0.8691, "step": 6693 }, { "epoch": 0.23644415402072508, "grad_norm": 1.7998610734939575, "learning_rate": 8.923559415430827e-06, "loss": 0.8431, "step": 6694 }, { "epoch": 0.23647947582443296, "grad_norm": 1.9063090085983276, "learning_rate": 8.923204823325022e-06, "loss": 0.8597, "step": 6695 }, { "epoch": 0.23651479762814087, "grad_norm": 1.5204623937606812, "learning_rate": 8.922850179873049e-06, "loss": 0.8533, "step": 6696 }, { "epoch": 0.23655011943184878, "grad_norm": 1.8265902996063232, "learning_rate": 8.92249548507955e-06, "loss": 0.8235, "step": 6697 }, { "epoch": 0.2365854412355567, "grad_norm": 1.5720641613006592, "learning_rate": 8.922140738949168e-06, "loss": 0.8313, "step": 6698 }, { "epoch": 0.2366207630392646, "grad_norm": 2.003528356552124, "learning_rate": 8.921785941486545e-06, "loss": 0.8233, "step": 6699 }, { "epoch": 0.23665608484297251, "grad_norm": 1.8520232439041138, "learning_rate": 8.921431092696325e-06, "loss": 0.8794, "step": 6700 }, { "epoch": 0.23669140664668042, "grad_norm": 1.945410132408142, "learning_rate": 8.921076192583152e-06, "loss": 0.851, "step": 6701 }, { "epoch": 0.2367267284503883, "grad_norm": 1.8166754245758057, "learning_rate": 8.92072124115167e-06, "loss": 0.8592, "step": 6702 }, { "epoch": 0.23676205025409622, "grad_norm": 1.9682023525238037, "learning_rate": 8.920366238406526e-06, "loss": 0.8705, "step": 6703 }, { "epoch": 0.23679737205780413, "grad_norm": 1.4695755243301392, "learning_rate": 8.920011184352367e-06, "loss": 0.8133, "step": 6704 }, { "epoch": 0.23683269386151204, "grad_norm": 1.4889804124832153, "learning_rate": 8.919656078993837e-06, "loss": 0.8308, "step": 6705 }, { "epoch": 0.23686801566521995, "grad_norm": 1.7691515684127808, "learning_rate": 8.919300922335585e-06, "loss": 0.8642, "step": 6706 }, { "epoch": 0.23690333746892786, "grad_norm": 1.851619839668274, "learning_rate": 8.918945714382258e-06, "loss": 0.8362, "step": 6707 }, { "epoch": 0.23693865927263577, "grad_norm": 1.6566704511642456, "learning_rate": 8.91859045513851e-06, "loss": 0.8157, "step": 6708 }, { "epoch": 0.23697398107634365, "grad_norm": 1.6615164279937744, "learning_rate": 8.918235144608985e-06, "loss": 0.8722, "step": 6709 }, { "epoch": 0.23700930288005156, "grad_norm": 1.5883768796920776, "learning_rate": 8.917879782798333e-06, "loss": 0.88, "step": 6710 }, { "epoch": 0.23704462468375948, "grad_norm": 1.7522954940795898, "learning_rate": 8.917524369711207e-06, "loss": 0.8382, "step": 6711 }, { "epoch": 0.23707994648746739, "grad_norm": 1.6613322496414185, "learning_rate": 8.91716890535226e-06, "loss": 0.8666, "step": 6712 }, { "epoch": 0.2371152682911753, "grad_norm": 1.7214181423187256, "learning_rate": 8.916813389726144e-06, "loss": 0.8535, "step": 6713 }, { "epoch": 0.2371505900948832, "grad_norm": 1.6287087202072144, "learning_rate": 8.91645782283751e-06, "loss": 0.8364, "step": 6714 }, { "epoch": 0.2371859118985911, "grad_norm": 1.6626307964324951, "learning_rate": 8.91610220469101e-06, "loss": 0.8663, "step": 6715 }, { "epoch": 0.237221233702299, "grad_norm": 1.5070806741714478, "learning_rate": 8.915746535291301e-06, "loss": 0.831, "step": 6716 }, { "epoch": 0.2372565555060069, "grad_norm": 1.5031898021697998, "learning_rate": 8.91539081464304e-06, "loss": 0.8208, "step": 6717 }, { "epoch": 0.23729187730971482, "grad_norm": 1.8674041032791138, "learning_rate": 8.915035042750879e-06, "loss": 0.8414, "step": 6718 }, { "epoch": 0.23732719911342273, "grad_norm": 2.284252405166626, "learning_rate": 8.914679219619474e-06, "loss": 0.869, "step": 6719 }, { "epoch": 0.23736252091713064, "grad_norm": 1.5626397132873535, "learning_rate": 8.914323345253484e-06, "loss": 0.8088, "step": 6720 }, { "epoch": 0.23739784272083855, "grad_norm": 1.6620439291000366, "learning_rate": 8.913967419657565e-06, "loss": 0.8326, "step": 6721 }, { "epoch": 0.23743316452454644, "grad_norm": 1.6122764348983765, "learning_rate": 8.913611442836376e-06, "loss": 0.8317, "step": 6722 }, { "epoch": 0.23746848632825435, "grad_norm": 1.5584290027618408, "learning_rate": 8.913255414794577e-06, "loss": 0.8421, "step": 6723 }, { "epoch": 0.23750380813196226, "grad_norm": 1.8733787536621094, "learning_rate": 8.912899335536828e-06, "loss": 0.8427, "step": 6724 }, { "epoch": 0.23753912993567017, "grad_norm": 1.6660237312316895, "learning_rate": 8.912543205067787e-06, "loss": 0.8422, "step": 6725 }, { "epoch": 0.23757445173937808, "grad_norm": 1.562203049659729, "learning_rate": 8.912187023392116e-06, "loss": 0.8142, "step": 6726 }, { "epoch": 0.237609773543086, "grad_norm": 1.841497778892517, "learning_rate": 8.911830790514476e-06, "loss": 0.882, "step": 6727 }, { "epoch": 0.23764509534679387, "grad_norm": 1.5917603969573975, "learning_rate": 8.91147450643953e-06, "loss": 0.8321, "step": 6728 }, { "epoch": 0.23768041715050178, "grad_norm": 1.884885311126709, "learning_rate": 8.911118171171943e-06, "loss": 0.8619, "step": 6729 }, { "epoch": 0.2377157389542097, "grad_norm": 2.082080125808716, "learning_rate": 8.910761784716374e-06, "loss": 0.8802, "step": 6730 }, { "epoch": 0.2377510607579176, "grad_norm": 1.5882952213287354, "learning_rate": 8.91040534707749e-06, "loss": 0.8338, "step": 6731 }, { "epoch": 0.23778638256162551, "grad_norm": 1.5331506729125977, "learning_rate": 8.910048858259956e-06, "loss": 0.8344, "step": 6732 }, { "epoch": 0.23782170436533343, "grad_norm": 1.7896162271499634, "learning_rate": 8.909692318268439e-06, "loss": 0.8478, "step": 6733 }, { "epoch": 0.23785702616904134, "grad_norm": 1.8999778032302856, "learning_rate": 8.909335727107604e-06, "loss": 0.8716, "step": 6734 }, { "epoch": 0.23789234797274922, "grad_norm": 1.7685656547546387, "learning_rate": 8.908979084782116e-06, "loss": 0.8568, "step": 6735 }, { "epoch": 0.23792766977645713, "grad_norm": 1.6998275518417358, "learning_rate": 8.908622391296646e-06, "loss": 0.8538, "step": 6736 }, { "epoch": 0.23796299158016504, "grad_norm": 1.0689785480499268, "learning_rate": 8.90826564665586e-06, "loss": 0.5804, "step": 6737 }, { "epoch": 0.23799831338387295, "grad_norm": 1.7274529933929443, "learning_rate": 8.907908850864428e-06, "loss": 0.855, "step": 6738 }, { "epoch": 0.23803363518758086, "grad_norm": 1.7144979238510132, "learning_rate": 8.90755200392702e-06, "loss": 0.8494, "step": 6739 }, { "epoch": 0.23806895699128877, "grad_norm": 1.5353176593780518, "learning_rate": 8.907195105848308e-06, "loss": 0.8572, "step": 6740 }, { "epoch": 0.23810427879499665, "grad_norm": 1.6737775802612305, "learning_rate": 8.906838156632958e-06, "loss": 0.8453, "step": 6741 }, { "epoch": 0.23813960059870456, "grad_norm": 1.9369577169418335, "learning_rate": 8.906481156285644e-06, "loss": 0.848, "step": 6742 }, { "epoch": 0.23817492240241248, "grad_norm": 1.581600546836853, "learning_rate": 8.906124104811042e-06, "loss": 0.8299, "step": 6743 }, { "epoch": 0.2382102442061204, "grad_norm": 1.7571099996566772, "learning_rate": 8.90576700221382e-06, "loss": 0.8116, "step": 6744 }, { "epoch": 0.2382455660098283, "grad_norm": 1.8489267826080322, "learning_rate": 8.905409848498654e-06, "loss": 0.8576, "step": 6745 }, { "epoch": 0.2382808878135362, "grad_norm": 1.527623176574707, "learning_rate": 8.905052643670217e-06, "loss": 0.7924, "step": 6746 }, { "epoch": 0.23831620961724412, "grad_norm": 1.5036484003067017, "learning_rate": 8.904695387733187e-06, "loss": 0.8116, "step": 6747 }, { "epoch": 0.238351531420952, "grad_norm": 1.6958558559417725, "learning_rate": 8.904338080692236e-06, "loss": 0.8174, "step": 6748 }, { "epoch": 0.2383868532246599, "grad_norm": 1.5645368099212646, "learning_rate": 8.903980722552044e-06, "loss": 0.818, "step": 6749 }, { "epoch": 0.23842217502836782, "grad_norm": 1.6239230632781982, "learning_rate": 8.903623313317286e-06, "loss": 0.8682, "step": 6750 }, { "epoch": 0.23845749683207573, "grad_norm": 1.9584828615188599, "learning_rate": 8.903265852992638e-06, "loss": 0.8229, "step": 6751 }, { "epoch": 0.23849281863578364, "grad_norm": 1.7759740352630615, "learning_rate": 8.902908341582781e-06, "loss": 0.8353, "step": 6752 }, { "epoch": 0.23852814043949155, "grad_norm": 1.7906532287597656, "learning_rate": 8.902550779092394e-06, "loss": 0.8336, "step": 6753 }, { "epoch": 0.23856346224319944, "grad_norm": 1.1567277908325195, "learning_rate": 8.902193165526155e-06, "loss": 0.6247, "step": 6754 }, { "epoch": 0.23859878404690735, "grad_norm": 1.5687137842178345, "learning_rate": 8.901835500888746e-06, "loss": 0.8807, "step": 6755 }, { "epoch": 0.23863410585061526, "grad_norm": 1.656386375427246, "learning_rate": 8.90147778518485e-06, "loss": 0.863, "step": 6756 }, { "epoch": 0.23866942765432317, "grad_norm": 1.648451805114746, "learning_rate": 8.901120018419141e-06, "loss": 0.8454, "step": 6757 }, { "epoch": 0.23870474945803108, "grad_norm": 1.733798861503601, "learning_rate": 8.90076220059631e-06, "loss": 0.8364, "step": 6758 }, { "epoch": 0.238740071261739, "grad_norm": 1.749839186668396, "learning_rate": 8.900404331721036e-06, "loss": 0.8635, "step": 6759 }, { "epoch": 0.2387753930654469, "grad_norm": 1.7448161840438843, "learning_rate": 8.900046411798002e-06, "loss": 0.8677, "step": 6760 }, { "epoch": 0.23881071486915478, "grad_norm": 1.9313114881515503, "learning_rate": 8.899688440831895e-06, "loss": 0.8707, "step": 6761 }, { "epoch": 0.2388460366728627, "grad_norm": 1.7127654552459717, "learning_rate": 8.8993304188274e-06, "loss": 0.825, "step": 6762 }, { "epoch": 0.2388813584765706, "grad_norm": 1.6583837270736694, "learning_rate": 8.898972345789199e-06, "loss": 0.8608, "step": 6763 }, { "epoch": 0.23891668028027852, "grad_norm": 1.6804195642471313, "learning_rate": 8.898614221721982e-06, "loss": 0.8391, "step": 6764 }, { "epoch": 0.23895200208398643, "grad_norm": 1.6543313264846802, "learning_rate": 8.898256046630433e-06, "loss": 0.833, "step": 6765 }, { "epoch": 0.23898732388769434, "grad_norm": 1.6635762453079224, "learning_rate": 8.897897820519244e-06, "loss": 0.8527, "step": 6766 }, { "epoch": 0.23902264569140222, "grad_norm": 1.776114821434021, "learning_rate": 8.8975395433931e-06, "loss": 0.8601, "step": 6767 }, { "epoch": 0.23905796749511013, "grad_norm": 1.7954176664352417, "learning_rate": 8.897181215256693e-06, "loss": 0.8222, "step": 6768 }, { "epoch": 0.23909328929881804, "grad_norm": 1.6679021120071411, "learning_rate": 8.896822836114709e-06, "loss": 0.8685, "step": 6769 }, { "epoch": 0.23912861110252595, "grad_norm": 1.5990238189697266, "learning_rate": 8.896464405971841e-06, "loss": 0.7903, "step": 6770 }, { "epoch": 0.23916393290623386, "grad_norm": 1.9580376148223877, "learning_rate": 8.89610592483278e-06, "loss": 0.8541, "step": 6771 }, { "epoch": 0.23919925470994177, "grad_norm": 1.8732788562774658, "learning_rate": 8.895747392702213e-06, "loss": 0.8417, "step": 6772 }, { "epoch": 0.23923457651364968, "grad_norm": 1.6160191297531128, "learning_rate": 8.895388809584841e-06, "loss": 0.8372, "step": 6773 }, { "epoch": 0.23926989831735757, "grad_norm": 1.6613959074020386, "learning_rate": 8.895030175485351e-06, "loss": 0.8571, "step": 6774 }, { "epoch": 0.23930522012106548, "grad_norm": 1.649854302406311, "learning_rate": 8.894671490408438e-06, "loss": 0.8074, "step": 6775 }, { "epoch": 0.2393405419247734, "grad_norm": 1.851822018623352, "learning_rate": 8.894312754358796e-06, "loss": 0.8326, "step": 6776 }, { "epoch": 0.2393758637284813, "grad_norm": 1.7554656267166138, "learning_rate": 8.893953967341122e-06, "loss": 0.8389, "step": 6777 }, { "epoch": 0.2394111855321892, "grad_norm": 1.7230476140975952, "learning_rate": 8.89359512936011e-06, "loss": 0.8662, "step": 6778 }, { "epoch": 0.23944650733589712, "grad_norm": 1.6121535301208496, "learning_rate": 8.893236240420458e-06, "loss": 0.8577, "step": 6779 }, { "epoch": 0.239481829139605, "grad_norm": 1.6541736125946045, "learning_rate": 8.892877300526859e-06, "loss": 0.8322, "step": 6780 }, { "epoch": 0.2395171509433129, "grad_norm": 1.6758604049682617, "learning_rate": 8.892518309684017e-06, "loss": 0.8466, "step": 6781 }, { "epoch": 0.23955247274702082, "grad_norm": 1.5893105268478394, "learning_rate": 8.892159267896625e-06, "loss": 0.8201, "step": 6782 }, { "epoch": 0.23958779455072873, "grad_norm": 1.807590365409851, "learning_rate": 8.891800175169385e-06, "loss": 0.8569, "step": 6783 }, { "epoch": 0.23962311635443664, "grad_norm": 2.0192830562591553, "learning_rate": 8.891441031506996e-06, "loss": 0.837, "step": 6784 }, { "epoch": 0.23965843815814455, "grad_norm": 1.9008396863937378, "learning_rate": 8.891081836914158e-06, "loss": 0.8274, "step": 6785 }, { "epoch": 0.23969375996185247, "grad_norm": 1.7035548686981201, "learning_rate": 8.890722591395571e-06, "loss": 0.8564, "step": 6786 }, { "epoch": 0.23972908176556035, "grad_norm": 1.7015691995620728, "learning_rate": 8.89036329495594e-06, "loss": 0.8468, "step": 6787 }, { "epoch": 0.23976440356926826, "grad_norm": 1.6596662998199463, "learning_rate": 8.890003947599964e-06, "loss": 0.8206, "step": 6788 }, { "epoch": 0.23979972537297617, "grad_norm": 1.736736536026001, "learning_rate": 8.889644549332348e-06, "loss": 0.8381, "step": 6789 }, { "epoch": 0.23983504717668408, "grad_norm": 1.8054722547531128, "learning_rate": 8.889285100157796e-06, "loss": 0.8811, "step": 6790 }, { "epoch": 0.239870368980392, "grad_norm": 1.7589329481124878, "learning_rate": 8.88892560008101e-06, "loss": 0.8436, "step": 6791 }, { "epoch": 0.2399056907840999, "grad_norm": 1.8746446371078491, "learning_rate": 8.8885660491067e-06, "loss": 0.8473, "step": 6792 }, { "epoch": 0.23994101258780778, "grad_norm": 1.7068476676940918, "learning_rate": 8.888206447239566e-06, "loss": 0.8224, "step": 6793 }, { "epoch": 0.2399763343915157, "grad_norm": 1.6081535816192627, "learning_rate": 8.887846794484318e-06, "loss": 0.8672, "step": 6794 }, { "epoch": 0.2400116561952236, "grad_norm": 1.6569913625717163, "learning_rate": 8.887487090845661e-06, "loss": 0.838, "step": 6795 }, { "epoch": 0.24004697799893152, "grad_norm": 1.955026626586914, "learning_rate": 8.887127336328305e-06, "loss": 0.8453, "step": 6796 }, { "epoch": 0.24008229980263943, "grad_norm": 1.7481944561004639, "learning_rate": 8.886767530936957e-06, "loss": 0.8432, "step": 6797 }, { "epoch": 0.24011762160634734, "grad_norm": 1.6313663721084595, "learning_rate": 8.886407674676324e-06, "loss": 0.8494, "step": 6798 }, { "epoch": 0.24015294341005525, "grad_norm": 1.6598812341690063, "learning_rate": 8.88604776755112e-06, "loss": 0.8589, "step": 6799 }, { "epoch": 0.24018826521376313, "grad_norm": 2.0796210765838623, "learning_rate": 8.885687809566056e-06, "loss": 0.8757, "step": 6800 }, { "epoch": 0.24022358701747104, "grad_norm": 1.8713198900222778, "learning_rate": 8.885327800725836e-06, "loss": 0.8868, "step": 6801 }, { "epoch": 0.24025890882117895, "grad_norm": 1.8978748321533203, "learning_rate": 8.884967741035177e-06, "loss": 0.8574, "step": 6802 }, { "epoch": 0.24029423062488686, "grad_norm": 1.9503912925720215, "learning_rate": 8.88460763049879e-06, "loss": 0.8472, "step": 6803 }, { "epoch": 0.24032955242859477, "grad_norm": 1.8610817193984985, "learning_rate": 8.88424746912139e-06, "loss": 0.8345, "step": 6804 }, { "epoch": 0.24036487423230268, "grad_norm": 1.6398911476135254, "learning_rate": 8.883887256907689e-06, "loss": 0.8377, "step": 6805 }, { "epoch": 0.24040019603601057, "grad_norm": 1.6096858978271484, "learning_rate": 8.883526993862402e-06, "loss": 0.8182, "step": 6806 }, { "epoch": 0.24043551783971848, "grad_norm": 2.0282416343688965, "learning_rate": 8.883166679990242e-06, "loss": 0.8902, "step": 6807 }, { "epoch": 0.2404708396434264, "grad_norm": 1.8090373277664185, "learning_rate": 8.882806315295928e-06, "loss": 0.8659, "step": 6808 }, { "epoch": 0.2405061614471343, "grad_norm": 1.7916628122329712, "learning_rate": 8.882445899784175e-06, "loss": 0.8332, "step": 6809 }, { "epoch": 0.2405414832508422, "grad_norm": 1.8547180891036987, "learning_rate": 8.882085433459698e-06, "loss": 0.8481, "step": 6810 }, { "epoch": 0.24057680505455012, "grad_norm": 1.7286388874053955, "learning_rate": 8.88172491632722e-06, "loss": 0.8438, "step": 6811 }, { "epoch": 0.24061212685825803, "grad_norm": 1.863616704940796, "learning_rate": 8.881364348391452e-06, "loss": 0.8062, "step": 6812 }, { "epoch": 0.2406474486619659, "grad_norm": 1.74275541305542, "learning_rate": 8.88100372965712e-06, "loss": 0.8435, "step": 6813 }, { "epoch": 0.24068277046567382, "grad_norm": 1.6971051692962646, "learning_rate": 8.88064306012894e-06, "loss": 0.8159, "step": 6814 }, { "epoch": 0.24071809226938173, "grad_norm": 1.9805699586868286, "learning_rate": 8.880282339811632e-06, "loss": 0.8065, "step": 6815 }, { "epoch": 0.24075341407308964, "grad_norm": 2.0794517993927, "learning_rate": 8.879921568709918e-06, "loss": 0.8485, "step": 6816 }, { "epoch": 0.24078873587679755, "grad_norm": 1.6717650890350342, "learning_rate": 8.879560746828522e-06, "loss": 0.8601, "step": 6817 }, { "epoch": 0.24082405768050547, "grad_norm": 1.9743810892105103, "learning_rate": 8.879199874172163e-06, "loss": 0.8378, "step": 6818 }, { "epoch": 0.24085937948421335, "grad_norm": 1.8821526765823364, "learning_rate": 8.878838950745565e-06, "loss": 0.865, "step": 6819 }, { "epoch": 0.24089470128792126, "grad_norm": 1.920164942741394, "learning_rate": 8.878477976553452e-06, "loss": 0.8453, "step": 6820 }, { "epoch": 0.24093002309162917, "grad_norm": 1.7632495164871216, "learning_rate": 8.878116951600548e-06, "loss": 0.8446, "step": 6821 }, { "epoch": 0.24096534489533708, "grad_norm": 1.9518117904663086, "learning_rate": 8.87775587589158e-06, "loss": 0.8381, "step": 6822 }, { "epoch": 0.241000666699045, "grad_norm": 1.630745530128479, "learning_rate": 8.87739474943127e-06, "loss": 0.8333, "step": 6823 }, { "epoch": 0.2410359885027529, "grad_norm": 1.8635590076446533, "learning_rate": 8.877033572224348e-06, "loss": 0.8596, "step": 6824 }, { "epoch": 0.2410713103064608, "grad_norm": 1.9007612466812134, "learning_rate": 8.87667234427554e-06, "loss": 0.8424, "step": 6825 }, { "epoch": 0.2411066321101687, "grad_norm": 1.870072603225708, "learning_rate": 8.87631106558957e-06, "loss": 0.864, "step": 6826 }, { "epoch": 0.2411419539138766, "grad_norm": 1.6690679788589478, "learning_rate": 8.875949736171171e-06, "loss": 0.8276, "step": 6827 }, { "epoch": 0.24117727571758452, "grad_norm": 1.7784136533737183, "learning_rate": 8.87558835602507e-06, "loss": 0.8384, "step": 6828 }, { "epoch": 0.24121259752129243, "grad_norm": 1.686598300933838, "learning_rate": 8.875226925156e-06, "loss": 0.8291, "step": 6829 }, { "epoch": 0.24124791932500034, "grad_norm": 1.8640739917755127, "learning_rate": 8.874865443568686e-06, "loss": 0.7993, "step": 6830 }, { "epoch": 0.24128324112870825, "grad_norm": 1.718154788017273, "learning_rate": 8.87450391126786e-06, "loss": 0.811, "step": 6831 }, { "epoch": 0.24131856293241613, "grad_norm": 1.747902750968933, "learning_rate": 8.874142328258257e-06, "loss": 0.8198, "step": 6832 }, { "epoch": 0.24135388473612404, "grad_norm": 1.693331241607666, "learning_rate": 8.873780694544608e-06, "loss": 0.8475, "step": 6833 }, { "epoch": 0.24138920653983195, "grad_norm": 1.6999295949935913, "learning_rate": 8.873419010131645e-06, "loss": 0.8157, "step": 6834 }, { "epoch": 0.24142452834353986, "grad_norm": 1.7944539785385132, "learning_rate": 8.873057275024102e-06, "loss": 0.8626, "step": 6835 }, { "epoch": 0.24145985014724777, "grad_norm": 1.6964322328567505, "learning_rate": 8.872695489226713e-06, "loss": 0.8528, "step": 6836 }, { "epoch": 0.24149517195095568, "grad_norm": 1.8136130571365356, "learning_rate": 8.872333652744214e-06, "loss": 0.8685, "step": 6837 }, { "epoch": 0.2415304937546636, "grad_norm": 1.7188504934310913, "learning_rate": 8.871971765581339e-06, "loss": 0.8762, "step": 6838 }, { "epoch": 0.24156581555837148, "grad_norm": 1.8811347484588623, "learning_rate": 8.871609827742827e-06, "loss": 0.884, "step": 6839 }, { "epoch": 0.2416011373620794, "grad_norm": 1.8162893056869507, "learning_rate": 8.871247839233413e-06, "loss": 0.8301, "step": 6840 }, { "epoch": 0.2416364591657873, "grad_norm": 1.8465347290039062, "learning_rate": 8.870885800057834e-06, "loss": 0.8569, "step": 6841 }, { "epoch": 0.2416717809694952, "grad_norm": 1.652673602104187, "learning_rate": 8.87052371022083e-06, "loss": 0.8394, "step": 6842 }, { "epoch": 0.24170710277320312, "grad_norm": 1.7311091423034668, "learning_rate": 8.870161569727139e-06, "loss": 0.8545, "step": 6843 }, { "epoch": 0.24174242457691103, "grad_norm": 1.8392857313156128, "learning_rate": 8.869799378581501e-06, "loss": 0.8487, "step": 6844 }, { "epoch": 0.2417777463806189, "grad_norm": 1.114159107208252, "learning_rate": 8.869437136788656e-06, "loss": 0.6003, "step": 6845 }, { "epoch": 0.24181306818432682, "grad_norm": 1.7765355110168457, "learning_rate": 8.869074844353345e-06, "loss": 0.8248, "step": 6846 }, { "epoch": 0.24184838998803473, "grad_norm": 1.6334948539733887, "learning_rate": 8.86871250128031e-06, "loss": 0.8444, "step": 6847 }, { "epoch": 0.24188371179174264, "grad_norm": 1.5496094226837158, "learning_rate": 8.868350107574293e-06, "loss": 0.8438, "step": 6848 }, { "epoch": 0.24191903359545056, "grad_norm": 1.59751558303833, "learning_rate": 8.867987663240037e-06, "loss": 0.8747, "step": 6849 }, { "epoch": 0.24195435539915847, "grad_norm": 1.599867820739746, "learning_rate": 8.867625168282285e-06, "loss": 0.8345, "step": 6850 }, { "epoch": 0.24198967720286638, "grad_norm": 1.8177622556686401, "learning_rate": 8.867262622705783e-06, "loss": 0.7953, "step": 6851 }, { "epoch": 0.24202499900657426, "grad_norm": 1.7383830547332764, "learning_rate": 8.866900026515275e-06, "loss": 0.8558, "step": 6852 }, { "epoch": 0.24206032081028217, "grad_norm": 1.960914969444275, "learning_rate": 8.866537379715506e-06, "loss": 0.838, "step": 6853 }, { "epoch": 0.24209564261399008, "grad_norm": 1.6340913772583008, "learning_rate": 8.866174682311221e-06, "loss": 0.8154, "step": 6854 }, { "epoch": 0.242130964417698, "grad_norm": 1.6305806636810303, "learning_rate": 8.86581193430717e-06, "loss": 0.7836, "step": 6855 }, { "epoch": 0.2421662862214059, "grad_norm": 1.8191883563995361, "learning_rate": 8.865449135708099e-06, "loss": 0.8596, "step": 6856 }, { "epoch": 0.2422016080251138, "grad_norm": 1.7576823234558105, "learning_rate": 8.865086286518756e-06, "loss": 0.8646, "step": 6857 }, { "epoch": 0.2422369298288217, "grad_norm": 1.8646750450134277, "learning_rate": 8.86472338674389e-06, "loss": 0.8378, "step": 6858 }, { "epoch": 0.2422722516325296, "grad_norm": 2.07724928855896, "learning_rate": 8.864360436388252e-06, "loss": 0.8384, "step": 6859 }, { "epoch": 0.24230757343623752, "grad_norm": 1.882780909538269, "learning_rate": 8.863997435456591e-06, "loss": 0.8413, "step": 6860 }, { "epoch": 0.24234289523994543, "grad_norm": 1.6133750677108765, "learning_rate": 8.863634383953656e-06, "loss": 0.8204, "step": 6861 }, { "epoch": 0.24237821704365334, "grad_norm": 1.5662851333618164, "learning_rate": 8.863271281884202e-06, "loss": 0.8186, "step": 6862 }, { "epoch": 0.24241353884736125, "grad_norm": 1.5770694017410278, "learning_rate": 8.862908129252979e-06, "loss": 0.8333, "step": 6863 }, { "epoch": 0.24244886065106916, "grad_norm": 1.731255292892456, "learning_rate": 8.86254492606474e-06, "loss": 0.8392, "step": 6864 }, { "epoch": 0.24248418245477704, "grad_norm": 1.6426938772201538, "learning_rate": 8.862181672324241e-06, "loss": 0.8305, "step": 6865 }, { "epoch": 0.24251950425848495, "grad_norm": 1.6897553205490112, "learning_rate": 8.861818368036232e-06, "loss": 0.8252, "step": 6866 }, { "epoch": 0.24255482606219286, "grad_norm": 1.6297742128372192, "learning_rate": 8.861455013205472e-06, "loss": 0.8567, "step": 6867 }, { "epoch": 0.24259014786590077, "grad_norm": 1.813068151473999, "learning_rate": 8.861091607836714e-06, "loss": 0.8602, "step": 6868 }, { "epoch": 0.24262546966960868, "grad_norm": 2.1106975078582764, "learning_rate": 8.860728151934715e-06, "loss": 0.8576, "step": 6869 }, { "epoch": 0.2426607914733166, "grad_norm": 1.957768440246582, "learning_rate": 8.860364645504231e-06, "loss": 0.9174, "step": 6870 }, { "epoch": 0.24269611327702448, "grad_norm": 1.777199387550354, "learning_rate": 8.860001088550021e-06, "loss": 0.8401, "step": 6871 }, { "epoch": 0.2427314350807324, "grad_norm": 1.9307507276535034, "learning_rate": 8.859637481076842e-06, "loss": 0.8663, "step": 6872 }, { "epoch": 0.2427667568844403, "grad_norm": 1.7262333631515503, "learning_rate": 8.859273823089453e-06, "loss": 0.9067, "step": 6873 }, { "epoch": 0.2428020786881482, "grad_norm": 2.977607011795044, "learning_rate": 8.858910114592614e-06, "loss": 0.8411, "step": 6874 }, { "epoch": 0.24283740049185612, "grad_norm": 1.7913960218429565, "learning_rate": 8.858546355591085e-06, "loss": 0.848, "step": 6875 }, { "epoch": 0.24287272229556403, "grad_norm": 1.641993522644043, "learning_rate": 8.858182546089627e-06, "loss": 0.8381, "step": 6876 }, { "epoch": 0.24290804409927194, "grad_norm": 1.608649492263794, "learning_rate": 8.857818686092999e-06, "loss": 0.8279, "step": 6877 }, { "epoch": 0.24294336590297982, "grad_norm": 1.684005618095398, "learning_rate": 8.857454775605968e-06, "loss": 0.8351, "step": 6878 }, { "epoch": 0.24297868770668773, "grad_norm": 1.8255865573883057, "learning_rate": 8.857090814633291e-06, "loss": 0.8249, "step": 6879 }, { "epoch": 0.24301400951039565, "grad_norm": 2.095665454864502, "learning_rate": 8.856726803179736e-06, "loss": 0.8206, "step": 6880 }, { "epoch": 0.24304933131410356, "grad_norm": 2.145061492919922, "learning_rate": 8.856362741250067e-06, "loss": 0.8309, "step": 6881 }, { "epoch": 0.24308465311781147, "grad_norm": 1.6111074686050415, "learning_rate": 8.855998628849046e-06, "loss": 0.8474, "step": 6882 }, { "epoch": 0.24311997492151938, "grad_norm": 1.6164705753326416, "learning_rate": 8.85563446598144e-06, "loss": 0.8536, "step": 6883 }, { "epoch": 0.24315529672522726, "grad_norm": 2.0599114894866943, "learning_rate": 8.855270252652014e-06, "loss": 0.835, "step": 6884 }, { "epoch": 0.24319061852893517, "grad_norm": 1.8479483127593994, "learning_rate": 8.854905988865537e-06, "loss": 0.8665, "step": 6885 }, { "epoch": 0.24322594033264308, "grad_norm": 1.6249139308929443, "learning_rate": 8.854541674626774e-06, "loss": 0.8361, "step": 6886 }, { "epoch": 0.243261262136351, "grad_norm": 1.843906283378601, "learning_rate": 8.854177309940495e-06, "loss": 0.8593, "step": 6887 }, { "epoch": 0.2432965839400589, "grad_norm": 1.7476952075958252, "learning_rate": 8.853812894811468e-06, "loss": 0.8586, "step": 6888 }, { "epoch": 0.2433319057437668, "grad_norm": 1.8061500787734985, "learning_rate": 8.853448429244463e-06, "loss": 0.8249, "step": 6889 }, { "epoch": 0.24336722754747472, "grad_norm": 1.9038453102111816, "learning_rate": 8.853083913244247e-06, "loss": 0.8939, "step": 6890 }, { "epoch": 0.2434025493511826, "grad_norm": 1.9258129596710205, "learning_rate": 8.852719346815593e-06, "loss": 0.8578, "step": 6891 }, { "epoch": 0.24343787115489052, "grad_norm": 1.8074212074279785, "learning_rate": 8.852354729963274e-06, "loss": 0.849, "step": 6892 }, { "epoch": 0.24347319295859843, "grad_norm": 1.7757774591445923, "learning_rate": 8.85199006269206e-06, "loss": 0.8487, "step": 6893 }, { "epoch": 0.24350851476230634, "grad_norm": 1.8022067546844482, "learning_rate": 8.851625345006725e-06, "loss": 0.8162, "step": 6894 }, { "epoch": 0.24354383656601425, "grad_norm": 1.600148320198059, "learning_rate": 8.85126057691204e-06, "loss": 0.8425, "step": 6895 }, { "epoch": 0.24357915836972216, "grad_norm": 2.0380101203918457, "learning_rate": 8.85089575841278e-06, "loss": 0.8565, "step": 6896 }, { "epoch": 0.24361448017343004, "grad_norm": 1.642351746559143, "learning_rate": 8.850530889513723e-06, "loss": 0.8409, "step": 6897 }, { "epoch": 0.24364980197713795, "grad_norm": 1.4757347106933594, "learning_rate": 8.85016597021964e-06, "loss": 0.8367, "step": 6898 }, { "epoch": 0.24368512378084586, "grad_norm": 1.6443357467651367, "learning_rate": 8.849801000535307e-06, "loss": 0.8051, "step": 6899 }, { "epoch": 0.24372044558455377, "grad_norm": 1.7616506814956665, "learning_rate": 8.849435980465505e-06, "loss": 0.8414, "step": 6900 }, { "epoch": 0.24375576738826168, "grad_norm": 1.5904128551483154, "learning_rate": 8.849070910015008e-06, "loss": 0.8209, "step": 6901 }, { "epoch": 0.2437910891919696, "grad_norm": 2.0289533138275146, "learning_rate": 8.848705789188592e-06, "loss": 0.8253, "step": 6902 }, { "epoch": 0.2438264109956775, "grad_norm": 1.626037836074829, "learning_rate": 8.84834061799104e-06, "loss": 0.8141, "step": 6903 }, { "epoch": 0.2438617327993854, "grad_norm": 1.6096299886703491, "learning_rate": 8.847975396427129e-06, "loss": 0.8309, "step": 6904 }, { "epoch": 0.2438970546030933, "grad_norm": 1.7615444660186768, "learning_rate": 8.84761012450164e-06, "loss": 0.8349, "step": 6905 }, { "epoch": 0.2439323764068012, "grad_norm": 1.869512915611267, "learning_rate": 8.847244802219352e-06, "loss": 0.8786, "step": 6906 }, { "epoch": 0.24396769821050912, "grad_norm": 1.678458571434021, "learning_rate": 8.846879429585049e-06, "loss": 0.8148, "step": 6907 }, { "epoch": 0.24400302001421703, "grad_norm": 2.0048768520355225, "learning_rate": 8.846514006603509e-06, "loss": 0.8564, "step": 6908 }, { "epoch": 0.24403834181792494, "grad_norm": 1.8436174392700195, "learning_rate": 8.846148533279517e-06, "loss": 0.8149, "step": 6909 }, { "epoch": 0.24407366362163282, "grad_norm": 1.782973051071167, "learning_rate": 8.845783009617857e-06, "loss": 0.8305, "step": 6910 }, { "epoch": 0.24410898542534074, "grad_norm": 1.7001969814300537, "learning_rate": 8.845417435623312e-06, "loss": 0.8216, "step": 6911 }, { "epoch": 0.24414430722904865, "grad_norm": 1.873921513557434, "learning_rate": 8.845051811300666e-06, "loss": 0.8773, "step": 6912 }, { "epoch": 0.24417962903275656, "grad_norm": 1.7310534715652466, "learning_rate": 8.844686136654704e-06, "loss": 0.8718, "step": 6913 }, { "epoch": 0.24421495083646447, "grad_norm": 1.9785798788070679, "learning_rate": 8.844320411690214e-06, "loss": 0.8119, "step": 6914 }, { "epoch": 0.24425027264017238, "grad_norm": 1.713792085647583, "learning_rate": 8.84395463641198e-06, "loss": 0.8693, "step": 6915 }, { "epoch": 0.2442855944438803, "grad_norm": 2.0600485801696777, "learning_rate": 8.843588810824791e-06, "loss": 0.8789, "step": 6916 }, { "epoch": 0.24432091624758817, "grad_norm": 0.9939165711402893, "learning_rate": 8.843222934933433e-06, "loss": 0.6258, "step": 6917 }, { "epoch": 0.24435623805129608, "grad_norm": 1.678306221961975, "learning_rate": 8.842857008742697e-06, "loss": 0.8582, "step": 6918 }, { "epoch": 0.244391559855004, "grad_norm": 1.729104995727539, "learning_rate": 8.84249103225737e-06, "loss": 0.8548, "step": 6919 }, { "epoch": 0.2444268816587119, "grad_norm": 1.6242549419403076, "learning_rate": 8.842125005482243e-06, "loss": 0.8092, "step": 6920 }, { "epoch": 0.2444622034624198, "grad_norm": 1.8057245016098022, "learning_rate": 8.841758928422106e-06, "loss": 0.8469, "step": 6921 }, { "epoch": 0.24449752526612772, "grad_norm": 2.006007194519043, "learning_rate": 8.84139280108175e-06, "loss": 0.8719, "step": 6922 }, { "epoch": 0.24453284706983563, "grad_norm": 1.5782325267791748, "learning_rate": 8.841026623465968e-06, "loss": 0.8396, "step": 6923 }, { "epoch": 0.24456816887354352, "grad_norm": 1.6202692985534668, "learning_rate": 8.840660395579552e-06, "loss": 0.8328, "step": 6924 }, { "epoch": 0.24460349067725143, "grad_norm": 1.7311118841171265, "learning_rate": 8.840294117427294e-06, "loss": 0.8318, "step": 6925 }, { "epoch": 0.24463881248095934, "grad_norm": 1.634238362312317, "learning_rate": 8.839927789013988e-06, "loss": 0.8299, "step": 6926 }, { "epoch": 0.24467413428466725, "grad_norm": 1.64797043800354, "learning_rate": 8.839561410344428e-06, "loss": 0.8473, "step": 6927 }, { "epoch": 0.24470945608837516, "grad_norm": 1.8397696018218994, "learning_rate": 8.839194981423412e-06, "loss": 0.8163, "step": 6928 }, { "epoch": 0.24474477789208307, "grad_norm": 1.6247707605361938, "learning_rate": 8.838828502255734e-06, "loss": 0.8692, "step": 6929 }, { "epoch": 0.24478009969579095, "grad_norm": 1.7657289505004883, "learning_rate": 8.838461972846188e-06, "loss": 0.8454, "step": 6930 }, { "epoch": 0.24481542149949886, "grad_norm": 1.665801763534546, "learning_rate": 8.838095393199574e-06, "loss": 0.8556, "step": 6931 }, { "epoch": 0.24485074330320677, "grad_norm": 1.8408823013305664, "learning_rate": 8.83772876332069e-06, "loss": 0.8698, "step": 6932 }, { "epoch": 0.24488606510691469, "grad_norm": 1.7008546590805054, "learning_rate": 8.837362083214333e-06, "loss": 0.8733, "step": 6933 }, { "epoch": 0.2449213869106226, "grad_norm": 1.7682424783706665, "learning_rate": 8.836995352885303e-06, "loss": 0.8368, "step": 6934 }, { "epoch": 0.2449567087143305, "grad_norm": 1.5368684530258179, "learning_rate": 8.836628572338398e-06, "loss": 0.8126, "step": 6935 }, { "epoch": 0.24499203051803842, "grad_norm": 1.8589760065078735, "learning_rate": 8.83626174157842e-06, "loss": 0.851, "step": 6936 }, { "epoch": 0.2450273523217463, "grad_norm": 1.8599331378936768, "learning_rate": 8.83589486061017e-06, "loss": 0.8238, "step": 6937 }, { "epoch": 0.2450626741254542, "grad_norm": 1.7319239377975464, "learning_rate": 8.835527929438447e-06, "loss": 0.8144, "step": 6938 }, { "epoch": 0.24509799592916212, "grad_norm": 1.6600239276885986, "learning_rate": 8.835160948068057e-06, "loss": 0.8277, "step": 6939 }, { "epoch": 0.24513331773287003, "grad_norm": 1.8690437078475952, "learning_rate": 8.834793916503803e-06, "loss": 0.8535, "step": 6940 }, { "epoch": 0.24516863953657794, "grad_norm": 1.7257734537124634, "learning_rate": 8.834426834750484e-06, "loss": 0.8283, "step": 6941 }, { "epoch": 0.24520396134028585, "grad_norm": 1.9665441513061523, "learning_rate": 8.83405970281291e-06, "loss": 0.8515, "step": 6942 }, { "epoch": 0.24523928314399374, "grad_norm": 2.0156655311584473, "learning_rate": 8.83369252069588e-06, "loss": 0.8663, "step": 6943 }, { "epoch": 0.24527460494770165, "grad_norm": 1.7526781558990479, "learning_rate": 8.833325288404206e-06, "loss": 0.8336, "step": 6944 }, { "epoch": 0.24530992675140956, "grad_norm": 1.6900684833526611, "learning_rate": 8.832958005942692e-06, "loss": 0.823, "step": 6945 }, { "epoch": 0.24534524855511747, "grad_norm": 2.3769266605377197, "learning_rate": 8.832590673316141e-06, "loss": 0.8619, "step": 6946 }, { "epoch": 0.24538057035882538, "grad_norm": 1.6036664247512817, "learning_rate": 8.832223290529365e-06, "loss": 0.843, "step": 6947 }, { "epoch": 0.2454158921625333, "grad_norm": 1.8943878412246704, "learning_rate": 8.83185585758717e-06, "loss": 0.8782, "step": 6948 }, { "epoch": 0.2454512139662412, "grad_norm": 1.6117361783981323, "learning_rate": 8.831488374494368e-06, "loss": 0.8044, "step": 6949 }, { "epoch": 0.24548653576994908, "grad_norm": 1.6649993658065796, "learning_rate": 8.831120841255765e-06, "loss": 0.8258, "step": 6950 }, { "epoch": 0.245521857573657, "grad_norm": 1.894494652748108, "learning_rate": 8.830753257876174e-06, "loss": 0.8164, "step": 6951 }, { "epoch": 0.2455571793773649, "grad_norm": 1.7724342346191406, "learning_rate": 8.830385624360403e-06, "loss": 0.8793, "step": 6952 }, { "epoch": 0.24559250118107281, "grad_norm": 1.6906788349151611, "learning_rate": 8.830017940713265e-06, "loss": 0.8217, "step": 6953 }, { "epoch": 0.24562782298478072, "grad_norm": 1.8858221769332886, "learning_rate": 8.829650206939572e-06, "loss": 0.8887, "step": 6954 }, { "epoch": 0.24566314478848864, "grad_norm": 1.5882872343063354, "learning_rate": 8.829282423044139e-06, "loss": 0.8318, "step": 6955 }, { "epoch": 0.24569846659219652, "grad_norm": 2.0322470664978027, "learning_rate": 8.828914589031774e-06, "loss": 0.8596, "step": 6956 }, { "epoch": 0.24573378839590443, "grad_norm": 1.636507272720337, "learning_rate": 8.828546704907296e-06, "loss": 0.8038, "step": 6957 }, { "epoch": 0.24576911019961234, "grad_norm": 1.6962263584136963, "learning_rate": 8.82817877067552e-06, "loss": 0.8609, "step": 6958 }, { "epoch": 0.24580443200332025, "grad_norm": 1.8053733110427856, "learning_rate": 8.82781078634126e-06, "loss": 0.8379, "step": 6959 }, { "epoch": 0.24583975380702816, "grad_norm": 1.7519643306732178, "learning_rate": 8.827442751909329e-06, "loss": 0.8216, "step": 6960 }, { "epoch": 0.24587507561073607, "grad_norm": 1.5518627166748047, "learning_rate": 8.82707466738455e-06, "loss": 0.8129, "step": 6961 }, { "epoch": 0.24591039741444398, "grad_norm": 1.5868529081344604, "learning_rate": 8.826706532771736e-06, "loss": 0.8134, "step": 6962 }, { "epoch": 0.24594571921815186, "grad_norm": 1.9822170734405518, "learning_rate": 8.826338348075706e-06, "loss": 0.8321, "step": 6963 }, { "epoch": 0.24598104102185978, "grad_norm": 1.8574961423873901, "learning_rate": 8.825970113301278e-06, "loss": 0.8406, "step": 6964 }, { "epoch": 0.24601636282556769, "grad_norm": 1.5404695272445679, "learning_rate": 8.825601828453275e-06, "loss": 0.8342, "step": 6965 }, { "epoch": 0.2460516846292756, "grad_norm": 1.7220655679702759, "learning_rate": 8.825233493536512e-06, "loss": 0.8497, "step": 6966 }, { "epoch": 0.2460870064329835, "grad_norm": 1.6457688808441162, "learning_rate": 8.824865108555814e-06, "loss": 0.8406, "step": 6967 }, { "epoch": 0.24612232823669142, "grad_norm": 1.6223411560058594, "learning_rate": 8.824496673515999e-06, "loss": 0.8326, "step": 6968 }, { "epoch": 0.2461576500403993, "grad_norm": 1.8208016157150269, "learning_rate": 8.82412818842189e-06, "loss": 0.8565, "step": 6969 }, { "epoch": 0.2461929718441072, "grad_norm": 1.630527138710022, "learning_rate": 8.82375965327831e-06, "loss": 0.8389, "step": 6970 }, { "epoch": 0.24622829364781512, "grad_norm": 1.6026607751846313, "learning_rate": 8.823391068090083e-06, "loss": 0.8137, "step": 6971 }, { "epoch": 0.24626361545152303, "grad_norm": 1.8355679512023926, "learning_rate": 8.823022432862034e-06, "loss": 0.8753, "step": 6972 }, { "epoch": 0.24629893725523094, "grad_norm": 1.7479133605957031, "learning_rate": 8.822653747598983e-06, "loss": 0.8358, "step": 6973 }, { "epoch": 0.24633425905893885, "grad_norm": 1.5722253322601318, "learning_rate": 8.822285012305762e-06, "loss": 0.831, "step": 6974 }, { "epoch": 0.24636958086264676, "grad_norm": 1.7518941164016724, "learning_rate": 8.821916226987191e-06, "loss": 0.8475, "step": 6975 }, { "epoch": 0.24640490266635465, "grad_norm": 1.8920738697052002, "learning_rate": 8.8215473916481e-06, "loss": 0.7919, "step": 6976 }, { "epoch": 0.24644022447006256, "grad_norm": 1.74713933467865, "learning_rate": 8.821178506293313e-06, "loss": 0.8476, "step": 6977 }, { "epoch": 0.24647554627377047, "grad_norm": 1.7628216743469238, "learning_rate": 8.820809570927662e-06, "loss": 0.834, "step": 6978 }, { "epoch": 0.24651086807747838, "grad_norm": 1.5934805870056152, "learning_rate": 8.820440585555974e-06, "loss": 0.8522, "step": 6979 }, { "epoch": 0.2465461898811863, "grad_norm": 1.6955811977386475, "learning_rate": 8.820071550183076e-06, "loss": 0.848, "step": 6980 }, { "epoch": 0.2465815116848942, "grad_norm": 1.847771167755127, "learning_rate": 8.819702464813799e-06, "loss": 0.8621, "step": 6981 }, { "epoch": 0.24661683348860208, "grad_norm": 1.4995399713516235, "learning_rate": 8.819333329452978e-06, "loss": 0.8389, "step": 6982 }, { "epoch": 0.24665215529231, "grad_norm": 1.6275471448898315, "learning_rate": 8.818964144105437e-06, "loss": 0.8271, "step": 6983 }, { "epoch": 0.2466874770960179, "grad_norm": 1.818442940711975, "learning_rate": 8.818594908776012e-06, "loss": 0.8611, "step": 6984 }, { "epoch": 0.24672279889972581, "grad_norm": 1.931593894958496, "learning_rate": 8.818225623469534e-06, "loss": 0.8496, "step": 6985 }, { "epoch": 0.24675812070343373, "grad_norm": 1.7004413604736328, "learning_rate": 8.817856288190838e-06, "loss": 0.8297, "step": 6986 }, { "epoch": 0.24679344250714164, "grad_norm": 1.2451361417770386, "learning_rate": 8.817486902944757e-06, "loss": 0.6425, "step": 6987 }, { "epoch": 0.24682876431084955, "grad_norm": 1.7133915424346924, "learning_rate": 8.817117467736122e-06, "loss": 0.8303, "step": 6988 }, { "epoch": 0.24686408611455743, "grad_norm": 1.8835935592651367, "learning_rate": 8.816747982569774e-06, "loss": 0.8751, "step": 6989 }, { "epoch": 0.24689940791826534, "grad_norm": 1.7236740589141846, "learning_rate": 8.816378447450546e-06, "loss": 0.8372, "step": 6990 }, { "epoch": 0.24693472972197325, "grad_norm": 1.7942813634872437, "learning_rate": 8.816008862383273e-06, "loss": 0.8609, "step": 6991 }, { "epoch": 0.24697005152568116, "grad_norm": 1.6559635400772095, "learning_rate": 8.815639227372794e-06, "loss": 0.8546, "step": 6992 }, { "epoch": 0.24700537332938907, "grad_norm": 1.9372543096542358, "learning_rate": 8.815269542423945e-06, "loss": 0.8118, "step": 6993 }, { "epoch": 0.24704069513309698, "grad_norm": 1.7380298376083374, "learning_rate": 8.814899807541566e-06, "loss": 0.8236, "step": 6994 }, { "epoch": 0.24707601693680487, "grad_norm": 1.6101402044296265, "learning_rate": 8.814530022730497e-06, "loss": 0.841, "step": 6995 }, { "epoch": 0.24711133874051278, "grad_norm": 1.559287190437317, "learning_rate": 8.814160187995574e-06, "loss": 0.8329, "step": 6996 }, { "epoch": 0.2471466605442207, "grad_norm": 1.9351204633712769, "learning_rate": 8.813790303341641e-06, "loss": 0.8509, "step": 6997 }, { "epoch": 0.2471819823479286, "grad_norm": 1.710610032081604, "learning_rate": 8.813420368773535e-06, "loss": 0.8214, "step": 6998 }, { "epoch": 0.2472173041516365, "grad_norm": 1.7267167568206787, "learning_rate": 8.813050384296102e-06, "loss": 0.8481, "step": 6999 }, { "epoch": 0.24725262595534442, "grad_norm": 1.716145634651184, "learning_rate": 8.812680349914182e-06, "loss": 0.813, "step": 7000 }, { "epoch": 0.24728794775905233, "grad_norm": 1.7356135845184326, "learning_rate": 8.812310265632618e-06, "loss": 0.8413, "step": 7001 }, { "epoch": 0.2473232695627602, "grad_norm": 3.5286004543304443, "learning_rate": 8.811940131456256e-06, "loss": 0.8169, "step": 7002 }, { "epoch": 0.24735859136646812, "grad_norm": 1.664290428161621, "learning_rate": 8.811569947389935e-06, "loss": 0.869, "step": 7003 }, { "epoch": 0.24739391317017603, "grad_norm": 1.7116037607192993, "learning_rate": 8.811199713438504e-06, "loss": 0.8379, "step": 7004 }, { "epoch": 0.24742923497388394, "grad_norm": 1.5755350589752197, "learning_rate": 8.810829429606808e-06, "loss": 0.7966, "step": 7005 }, { "epoch": 0.24746455677759185, "grad_norm": 1.6142544746398926, "learning_rate": 8.810459095899692e-06, "loss": 0.8953, "step": 7006 }, { "epoch": 0.24749987858129976, "grad_norm": 1.5645787715911865, "learning_rate": 8.810088712322006e-06, "loss": 0.8128, "step": 7007 }, { "epoch": 0.24753520038500765, "grad_norm": 1.9002044200897217, "learning_rate": 8.809718278878594e-06, "loss": 0.8259, "step": 7008 }, { "epoch": 0.24757052218871556, "grad_norm": 1.6281484365463257, "learning_rate": 8.809347795574304e-06, "loss": 0.8503, "step": 7009 }, { "epoch": 0.24760584399242347, "grad_norm": 1.6441015005111694, "learning_rate": 8.808977262413987e-06, "loss": 0.8416, "step": 7010 }, { "epoch": 0.24764116579613138, "grad_norm": 1.7207632064819336, "learning_rate": 8.808606679402491e-06, "loss": 0.8545, "step": 7011 }, { "epoch": 0.2476764875998393, "grad_norm": 1.7791252136230469, "learning_rate": 8.808236046544667e-06, "loss": 0.8431, "step": 7012 }, { "epoch": 0.2477118094035472, "grad_norm": 1.523136019706726, "learning_rate": 8.807865363845367e-06, "loss": 0.8035, "step": 7013 }, { "epoch": 0.2477471312072551, "grad_norm": 1.7461801767349243, "learning_rate": 8.80749463130944e-06, "loss": 0.8341, "step": 7014 }, { "epoch": 0.247782453010963, "grad_norm": 1.7187002897262573, "learning_rate": 8.807123848941738e-06, "loss": 0.8334, "step": 7015 }, { "epoch": 0.2478177748146709, "grad_norm": 1.8738014698028564, "learning_rate": 8.806753016747115e-06, "loss": 0.8332, "step": 7016 }, { "epoch": 0.24785309661837882, "grad_norm": 1.8433544635772705, "learning_rate": 8.806382134730424e-06, "loss": 0.844, "step": 7017 }, { "epoch": 0.24788841842208673, "grad_norm": 1.7012357711791992, "learning_rate": 8.80601120289652e-06, "loss": 0.8035, "step": 7018 }, { "epoch": 0.24792374022579464, "grad_norm": 1.6125823259353638, "learning_rate": 8.805640221250256e-06, "loss": 0.8345, "step": 7019 }, { "epoch": 0.24795906202950255, "grad_norm": 1.7204633951187134, "learning_rate": 8.805269189796489e-06, "loss": 0.8785, "step": 7020 }, { "epoch": 0.24799438383321043, "grad_norm": 1.7128925323486328, "learning_rate": 8.804898108540075e-06, "loss": 0.8388, "step": 7021 }, { "epoch": 0.24802970563691834, "grad_norm": 1.7191475629806519, "learning_rate": 8.804526977485868e-06, "loss": 0.8287, "step": 7022 }, { "epoch": 0.24806502744062625, "grad_norm": 1.6795787811279297, "learning_rate": 8.804155796638727e-06, "loss": 0.8441, "step": 7023 }, { "epoch": 0.24810034924433416, "grad_norm": 1.7450429201126099, "learning_rate": 8.80378456600351e-06, "loss": 0.8706, "step": 7024 }, { "epoch": 0.24813567104804207, "grad_norm": 1.9537841081619263, "learning_rate": 8.803413285585078e-06, "loss": 0.8581, "step": 7025 }, { "epoch": 0.24817099285174998, "grad_norm": 1.8355884552001953, "learning_rate": 8.803041955388286e-06, "loss": 0.8728, "step": 7026 }, { "epoch": 0.2482063146554579, "grad_norm": 1.6145834922790527, "learning_rate": 8.802670575417995e-06, "loss": 0.8459, "step": 7027 }, { "epoch": 0.24824163645916578, "grad_norm": 1.6456390619277954, "learning_rate": 8.802299145679068e-06, "loss": 0.8351, "step": 7028 }, { "epoch": 0.2482769582628737, "grad_norm": 1.7985411882400513, "learning_rate": 8.801927666176365e-06, "loss": 0.8296, "step": 7029 }, { "epoch": 0.2483122800665816, "grad_norm": 1.7255409955978394, "learning_rate": 8.801556136914746e-06, "loss": 0.799, "step": 7030 }, { "epoch": 0.2483476018702895, "grad_norm": 1.6221013069152832, "learning_rate": 8.801184557899073e-06, "loss": 0.8484, "step": 7031 }, { "epoch": 0.24838292367399742, "grad_norm": 1.6507169008255005, "learning_rate": 8.800812929134214e-06, "loss": 0.8407, "step": 7032 }, { "epoch": 0.24841824547770533, "grad_norm": 1.6400189399719238, "learning_rate": 8.80044125062503e-06, "loss": 0.8208, "step": 7033 }, { "epoch": 0.2484535672814132, "grad_norm": 1.6844340562820435, "learning_rate": 8.800069522376385e-06, "loss": 0.8386, "step": 7034 }, { "epoch": 0.24848888908512112, "grad_norm": 2.7995407581329346, "learning_rate": 8.799697744393144e-06, "loss": 0.8436, "step": 7035 }, { "epoch": 0.24852421088882903, "grad_norm": 1.777637004852295, "learning_rate": 8.799325916680173e-06, "loss": 0.8542, "step": 7036 }, { "epoch": 0.24855953269253694, "grad_norm": 1.8829624652862549, "learning_rate": 8.798954039242338e-06, "loss": 0.8198, "step": 7037 }, { "epoch": 0.24859485449624485, "grad_norm": 1.8400431871414185, "learning_rate": 8.798582112084507e-06, "loss": 0.847, "step": 7038 }, { "epoch": 0.24863017629995277, "grad_norm": 4.9502482414245605, "learning_rate": 8.798210135211548e-06, "loss": 0.8624, "step": 7039 }, { "epoch": 0.24866549810366068, "grad_norm": 1.6172274351119995, "learning_rate": 8.797838108628328e-06, "loss": 0.8749, "step": 7040 }, { "epoch": 0.24870081990736856, "grad_norm": 1.692838191986084, "learning_rate": 8.797466032339717e-06, "loss": 0.824, "step": 7041 }, { "epoch": 0.24873614171107647, "grad_norm": 1.630196452140808, "learning_rate": 8.797093906350585e-06, "loss": 0.796, "step": 7042 }, { "epoch": 0.24877146351478438, "grad_norm": 1.6107451915740967, "learning_rate": 8.7967217306658e-06, "loss": 0.8291, "step": 7043 }, { "epoch": 0.2488067853184923, "grad_norm": 1.861283779144287, "learning_rate": 8.796349505290237e-06, "loss": 0.8402, "step": 7044 }, { "epoch": 0.2488421071222002, "grad_norm": 1.7695046663284302, "learning_rate": 8.795977230228763e-06, "loss": 0.8554, "step": 7045 }, { "epoch": 0.2488774289259081, "grad_norm": 1.6155855655670166, "learning_rate": 8.795604905486254e-06, "loss": 0.8462, "step": 7046 }, { "epoch": 0.248912750729616, "grad_norm": 1.656355381011963, "learning_rate": 8.795232531067582e-06, "loss": 0.8329, "step": 7047 }, { "epoch": 0.2489480725333239, "grad_norm": 1.5084697008132935, "learning_rate": 8.79486010697762e-06, "loss": 0.8096, "step": 7048 }, { "epoch": 0.24898339433703182, "grad_norm": 1.7973628044128418, "learning_rate": 8.794487633221241e-06, "loss": 0.8779, "step": 7049 }, { "epoch": 0.24901871614073973, "grad_norm": 1.1258944272994995, "learning_rate": 8.794115109803323e-06, "loss": 0.6139, "step": 7050 }, { "epoch": 0.24905403794444764, "grad_norm": 1.8609230518341064, "learning_rate": 8.793742536728737e-06, "loss": 0.8278, "step": 7051 }, { "epoch": 0.24908935974815555, "grad_norm": 1.8803609609603882, "learning_rate": 8.793369914002364e-06, "loss": 0.8429, "step": 7052 }, { "epoch": 0.24912468155186346, "grad_norm": 1.772116780281067, "learning_rate": 8.792997241629078e-06, "loss": 0.8181, "step": 7053 }, { "epoch": 0.24916000335557134, "grad_norm": 1.5819157361984253, "learning_rate": 8.792624519613758e-06, "loss": 0.8212, "step": 7054 }, { "epoch": 0.24919532515927925, "grad_norm": 1.6253271102905273, "learning_rate": 8.79225174796128e-06, "loss": 0.8872, "step": 7055 }, { "epoch": 0.24923064696298716, "grad_norm": 1.7351202964782715, "learning_rate": 8.791878926676524e-06, "loss": 0.8525, "step": 7056 }, { "epoch": 0.24926596876669507, "grad_norm": 2.031532049179077, "learning_rate": 8.79150605576437e-06, "loss": 0.8402, "step": 7057 }, { "epoch": 0.24930129057040298, "grad_norm": 1.7085586786270142, "learning_rate": 8.791133135229698e-06, "loss": 0.8296, "step": 7058 }, { "epoch": 0.2493366123741109, "grad_norm": 1.6064025163650513, "learning_rate": 8.790760165077385e-06, "loss": 0.8495, "step": 7059 }, { "epoch": 0.24937193417781878, "grad_norm": 1.6767312288284302, "learning_rate": 8.790387145312318e-06, "loss": 0.8152, "step": 7060 }, { "epoch": 0.2494072559815267, "grad_norm": 1.6945483684539795, "learning_rate": 8.790014075939377e-06, "loss": 0.8429, "step": 7061 }, { "epoch": 0.2494425777852346, "grad_norm": 1.796704649925232, "learning_rate": 8.789640956963444e-06, "loss": 0.8364, "step": 7062 }, { "epoch": 0.2494778995889425, "grad_norm": 1.6936838626861572, "learning_rate": 8.789267788389403e-06, "loss": 0.8517, "step": 7063 }, { "epoch": 0.24951322139265042, "grad_norm": 2.238048791885376, "learning_rate": 8.788894570222135e-06, "loss": 0.8405, "step": 7064 }, { "epoch": 0.24954854319635833, "grad_norm": 1.671518325805664, "learning_rate": 8.788521302466529e-06, "loss": 0.8443, "step": 7065 }, { "epoch": 0.24958386500006624, "grad_norm": 1.6875126361846924, "learning_rate": 8.788147985127468e-06, "loss": 0.8285, "step": 7066 }, { "epoch": 0.24961918680377412, "grad_norm": 1.807346224784851, "learning_rate": 8.787774618209839e-06, "loss": 0.8414, "step": 7067 }, { "epoch": 0.24965450860748203, "grad_norm": 1.8060222864151, "learning_rate": 8.787401201718526e-06, "loss": 0.8752, "step": 7068 }, { "epoch": 0.24968983041118994, "grad_norm": 1.655098795890808, "learning_rate": 8.78702773565842e-06, "loss": 0.8532, "step": 7069 }, { "epoch": 0.24972515221489786, "grad_norm": 1.6847243309020996, "learning_rate": 8.786654220034405e-06, "loss": 0.8429, "step": 7070 }, { "epoch": 0.24976047401860577, "grad_norm": 1.7022908926010132, "learning_rate": 8.786280654851373e-06, "loss": 0.8203, "step": 7071 }, { "epoch": 0.24979579582231368, "grad_norm": 1.253366231918335, "learning_rate": 8.785907040114212e-06, "loss": 0.6413, "step": 7072 }, { "epoch": 0.24983111762602156, "grad_norm": 1.9206817150115967, "learning_rate": 8.78553337582781e-06, "loss": 0.8275, "step": 7073 }, { "epoch": 0.24986643942972947, "grad_norm": 1.97211492061615, "learning_rate": 8.785159661997058e-06, "loss": 0.8694, "step": 7074 }, { "epoch": 0.24990176123343738, "grad_norm": 1.6593973636627197, "learning_rate": 8.78478589862685e-06, "loss": 0.8756, "step": 7075 }, { "epoch": 0.2499370830371453, "grad_norm": 1.6895718574523926, "learning_rate": 8.784412085722075e-06, "loss": 0.833, "step": 7076 }, { "epoch": 0.2499724048408532, "grad_norm": 1.8041805028915405, "learning_rate": 8.784038223287626e-06, "loss": 0.8141, "step": 7077 }, { "epoch": 0.2500077266445611, "grad_norm": 1.8051131963729858, "learning_rate": 8.783664311328395e-06, "loss": 0.8681, "step": 7078 }, { "epoch": 0.250043048448269, "grad_norm": 1.847474455833435, "learning_rate": 8.783290349849277e-06, "loss": 0.8951, "step": 7079 }, { "epoch": 0.25007837025197693, "grad_norm": 1.9069898128509521, "learning_rate": 8.782916338855167e-06, "loss": 0.791, "step": 7080 }, { "epoch": 0.25011369205568484, "grad_norm": 1.8747732639312744, "learning_rate": 8.782542278350958e-06, "loss": 0.8445, "step": 7081 }, { "epoch": 0.25014901385939275, "grad_norm": 1.766296148300171, "learning_rate": 8.782168168341547e-06, "loss": 0.8275, "step": 7082 }, { "epoch": 0.2501843356631006, "grad_norm": 1.7447683811187744, "learning_rate": 8.781794008831831e-06, "loss": 0.8458, "step": 7083 }, { "epoch": 0.2502196574668085, "grad_norm": 1.8411109447479248, "learning_rate": 8.781419799826704e-06, "loss": 0.8223, "step": 7084 }, { "epoch": 0.25025497927051643, "grad_norm": 2.0218801498413086, "learning_rate": 8.781045541331068e-06, "loss": 0.8685, "step": 7085 }, { "epoch": 0.25029030107422434, "grad_norm": 1.8342008590698242, "learning_rate": 8.780671233349818e-06, "loss": 0.8221, "step": 7086 }, { "epoch": 0.25032562287793225, "grad_norm": 1.6635050773620605, "learning_rate": 8.780296875887853e-06, "loss": 0.8537, "step": 7087 }, { "epoch": 0.25036094468164016, "grad_norm": 1.8030182123184204, "learning_rate": 8.779922468950073e-06, "loss": 0.8839, "step": 7088 }, { "epoch": 0.2503962664853481, "grad_norm": 1.9037405252456665, "learning_rate": 8.779548012541377e-06, "loss": 0.8709, "step": 7089 }, { "epoch": 0.250431588289056, "grad_norm": 2.003222942352295, "learning_rate": 8.779173506666669e-06, "loss": 0.8493, "step": 7090 }, { "epoch": 0.2504669100927639, "grad_norm": 1.7062301635742188, "learning_rate": 8.778798951330847e-06, "loss": 0.8409, "step": 7091 }, { "epoch": 0.2505022318964718, "grad_norm": 1.8553767204284668, "learning_rate": 8.778424346538817e-06, "loss": 0.8632, "step": 7092 }, { "epoch": 0.2505375537001797, "grad_norm": 1.5694438219070435, "learning_rate": 8.778049692295478e-06, "loss": 0.8447, "step": 7093 }, { "epoch": 0.2505728755038876, "grad_norm": 1.7156860828399658, "learning_rate": 8.777674988605734e-06, "loss": 0.8283, "step": 7094 }, { "epoch": 0.25060819730759554, "grad_norm": 1.09752357006073, "learning_rate": 8.77730023547449e-06, "loss": 0.6199, "step": 7095 }, { "epoch": 0.2506435191113034, "grad_norm": 1.8111581802368164, "learning_rate": 8.776925432906653e-06, "loss": 0.8602, "step": 7096 }, { "epoch": 0.2506788409150113, "grad_norm": 1.6695812940597534, "learning_rate": 8.776550580907125e-06, "loss": 0.8368, "step": 7097 }, { "epoch": 0.2507141627187192, "grad_norm": 1.5302090644836426, "learning_rate": 8.776175679480812e-06, "loss": 0.8315, "step": 7098 }, { "epoch": 0.2507494845224271, "grad_norm": 1.7223480939865112, "learning_rate": 8.77580072863262e-06, "loss": 0.8641, "step": 7099 }, { "epoch": 0.25078480632613503, "grad_norm": 2.0648581981658936, "learning_rate": 8.77542572836746e-06, "loss": 0.8472, "step": 7100 }, { "epoch": 0.25082012812984295, "grad_norm": 1.6984179019927979, "learning_rate": 8.775050678690238e-06, "loss": 0.8398, "step": 7101 }, { "epoch": 0.25085544993355086, "grad_norm": 1.8435742855072021, "learning_rate": 8.774675579605861e-06, "loss": 0.8195, "step": 7102 }, { "epoch": 0.25089077173725877, "grad_norm": 1.7027243375778198, "learning_rate": 8.77430043111924e-06, "loss": 0.8369, "step": 7103 }, { "epoch": 0.2509260935409667, "grad_norm": 1.8478808403015137, "learning_rate": 8.773925233235285e-06, "loss": 0.826, "step": 7104 }, { "epoch": 0.2509614153446746, "grad_norm": 1.6331238746643066, "learning_rate": 8.773549985958908e-06, "loss": 0.8367, "step": 7105 }, { "epoch": 0.2509967371483825, "grad_norm": 1.980004072189331, "learning_rate": 8.773174689295014e-06, "loss": 0.8257, "step": 7106 }, { "epoch": 0.2510320589520904, "grad_norm": 1.7939136028289795, "learning_rate": 8.77279934324852e-06, "loss": 0.8385, "step": 7107 }, { "epoch": 0.2510673807557983, "grad_norm": 1.7237893342971802, "learning_rate": 8.772423947824337e-06, "loss": 0.8664, "step": 7108 }, { "epoch": 0.2511027025595062, "grad_norm": 1.6300840377807617, "learning_rate": 8.772048503027382e-06, "loss": 0.809, "step": 7109 }, { "epoch": 0.2511380243632141, "grad_norm": 1.67171049118042, "learning_rate": 8.771673008862562e-06, "loss": 0.8539, "step": 7110 }, { "epoch": 0.251173346166922, "grad_norm": 1.6478813886642456, "learning_rate": 8.771297465334795e-06, "loss": 0.827, "step": 7111 }, { "epoch": 0.2512086679706299, "grad_norm": 1.6066062450408936, "learning_rate": 8.770921872448996e-06, "loss": 0.8228, "step": 7112 }, { "epoch": 0.2512439897743378, "grad_norm": 1.6020302772521973, "learning_rate": 8.770546230210081e-06, "loss": 0.8429, "step": 7113 }, { "epoch": 0.2512793115780457, "grad_norm": 1.612375259399414, "learning_rate": 8.770170538622966e-06, "loss": 0.841, "step": 7114 }, { "epoch": 0.25131463338175364, "grad_norm": 1.7425005435943604, "learning_rate": 8.769794797692567e-06, "loss": 0.8287, "step": 7115 }, { "epoch": 0.25134995518546155, "grad_norm": 1.4852914810180664, "learning_rate": 8.769419007423803e-06, "loss": 0.8089, "step": 7116 }, { "epoch": 0.25138527698916946, "grad_norm": 1.650694727897644, "learning_rate": 8.769043167821592e-06, "loss": 0.8483, "step": 7117 }, { "epoch": 0.25142059879287737, "grad_norm": 1.7176153659820557, "learning_rate": 8.76866727889085e-06, "loss": 0.8331, "step": 7118 }, { "epoch": 0.2514559205965853, "grad_norm": 1.641636610031128, "learning_rate": 8.768291340636501e-06, "loss": 0.8346, "step": 7119 }, { "epoch": 0.2514912424002932, "grad_norm": 1.7800137996673584, "learning_rate": 8.767915353063464e-06, "loss": 0.8513, "step": 7120 }, { "epoch": 0.2515265642040011, "grad_norm": 1.7431796789169312, "learning_rate": 8.767539316176656e-06, "loss": 0.8212, "step": 7121 }, { "epoch": 0.25156188600770896, "grad_norm": 1.7587432861328125, "learning_rate": 8.767163229981006e-06, "loss": 0.8629, "step": 7122 }, { "epoch": 0.25159720781141687, "grad_norm": 1.6050176620483398, "learning_rate": 8.766787094481428e-06, "loss": 0.8427, "step": 7123 }, { "epoch": 0.2516325296151248, "grad_norm": 1.5170248746871948, "learning_rate": 8.76641090968285e-06, "loss": 0.8163, "step": 7124 }, { "epoch": 0.2516678514188327, "grad_norm": 1.7169948816299438, "learning_rate": 8.766034675590194e-06, "loss": 0.813, "step": 7125 }, { "epoch": 0.2517031732225406, "grad_norm": 1.6831884384155273, "learning_rate": 8.765658392208384e-06, "loss": 0.8587, "step": 7126 }, { "epoch": 0.2517384950262485, "grad_norm": 1.7058298587799072, "learning_rate": 8.765282059542347e-06, "loss": 0.8433, "step": 7127 }, { "epoch": 0.2517738168299564, "grad_norm": 1.5595309734344482, "learning_rate": 8.764905677597003e-06, "loss": 0.8417, "step": 7128 }, { "epoch": 0.25180913863366433, "grad_norm": 1.7866472005844116, "learning_rate": 8.764529246377283e-06, "loss": 0.8572, "step": 7129 }, { "epoch": 0.25184446043737224, "grad_norm": 1.6260966062545776, "learning_rate": 8.764152765888111e-06, "loss": 0.7942, "step": 7130 }, { "epoch": 0.25187978224108015, "grad_norm": 1.5380628108978271, "learning_rate": 8.763776236134415e-06, "loss": 0.8305, "step": 7131 }, { "epoch": 0.25191510404478806, "grad_norm": 1.7730510234832764, "learning_rate": 8.763399657121124e-06, "loss": 0.8416, "step": 7132 }, { "epoch": 0.251950425848496, "grad_norm": 2.146930694580078, "learning_rate": 8.763023028853166e-06, "loss": 0.8637, "step": 7133 }, { "epoch": 0.2519857476522039, "grad_norm": 1.7866475582122803, "learning_rate": 8.762646351335469e-06, "loss": 0.8252, "step": 7134 }, { "epoch": 0.25202106945591174, "grad_norm": 1.6283854246139526, "learning_rate": 8.762269624572963e-06, "loss": 0.8203, "step": 7135 }, { "epoch": 0.25205639125961965, "grad_norm": 1.5968222618103027, "learning_rate": 8.761892848570581e-06, "loss": 0.8196, "step": 7136 }, { "epoch": 0.25209171306332756, "grad_norm": 1.810297966003418, "learning_rate": 8.761516023333253e-06, "loss": 0.8426, "step": 7137 }, { "epoch": 0.25212703486703547, "grad_norm": 1.8228572607040405, "learning_rate": 8.76113914886591e-06, "loss": 0.841, "step": 7138 }, { "epoch": 0.2521623566707434, "grad_norm": 1.7156778573989868, "learning_rate": 8.760762225173484e-06, "loss": 0.8317, "step": 7139 }, { "epoch": 0.2521976784744513, "grad_norm": 1.7066513299942017, "learning_rate": 8.760385252260908e-06, "loss": 0.894, "step": 7140 }, { "epoch": 0.2522330002781592, "grad_norm": 1.5874947309494019, "learning_rate": 8.760008230133118e-06, "loss": 0.8279, "step": 7141 }, { "epoch": 0.2522683220818671, "grad_norm": 1.7548860311508179, "learning_rate": 8.759631158795049e-06, "loss": 0.8313, "step": 7142 }, { "epoch": 0.252303643885575, "grad_norm": 1.674248456954956, "learning_rate": 8.759254038251632e-06, "loss": 0.8749, "step": 7143 }, { "epoch": 0.25233896568928293, "grad_norm": 1.9323697090148926, "learning_rate": 8.758876868507804e-06, "loss": 0.8687, "step": 7144 }, { "epoch": 0.25237428749299085, "grad_norm": 1.7857732772827148, "learning_rate": 8.758499649568503e-06, "loss": 0.848, "step": 7145 }, { "epoch": 0.25240960929669876, "grad_norm": 1.667475700378418, "learning_rate": 8.758122381438665e-06, "loss": 0.8466, "step": 7146 }, { "epoch": 0.25244493110040667, "grad_norm": 1.7292466163635254, "learning_rate": 8.757745064123229e-06, "loss": 0.855, "step": 7147 }, { "epoch": 0.2524802529041145, "grad_norm": 1.7083837985992432, "learning_rate": 8.75736769762713e-06, "loss": 0.8438, "step": 7148 }, { "epoch": 0.25251557470782243, "grad_norm": 1.6594429016113281, "learning_rate": 8.756990281955312e-06, "loss": 0.8063, "step": 7149 }, { "epoch": 0.25255089651153034, "grad_norm": 1.6958208084106445, "learning_rate": 8.756612817112709e-06, "loss": 0.8455, "step": 7150 }, { "epoch": 0.25258621831523825, "grad_norm": 1.6166343688964844, "learning_rate": 8.756235303104264e-06, "loss": 0.8507, "step": 7151 }, { "epoch": 0.25262154011894616, "grad_norm": 1.727060079574585, "learning_rate": 8.755857739934917e-06, "loss": 0.8477, "step": 7152 }, { "epoch": 0.2526568619226541, "grad_norm": 1.586993932723999, "learning_rate": 8.75548012760961e-06, "loss": 0.8034, "step": 7153 }, { "epoch": 0.252692183726362, "grad_norm": 1.6457421779632568, "learning_rate": 8.755102466133285e-06, "loss": 0.8249, "step": 7154 }, { "epoch": 0.2527275055300699, "grad_norm": 1.9443323612213135, "learning_rate": 8.754724755510887e-06, "loss": 0.8141, "step": 7155 }, { "epoch": 0.2527628273337778, "grad_norm": 2.0878920555114746, "learning_rate": 8.754346995747353e-06, "loss": 0.8654, "step": 7156 }, { "epoch": 0.2527981491374857, "grad_norm": 1.7984546422958374, "learning_rate": 8.753969186847635e-06, "loss": 0.8466, "step": 7157 }, { "epoch": 0.2528334709411936, "grad_norm": 1.6842143535614014, "learning_rate": 8.753591328816671e-06, "loss": 0.7994, "step": 7158 }, { "epoch": 0.25286879274490154, "grad_norm": 1.8235565423965454, "learning_rate": 8.753213421659412e-06, "loss": 0.8226, "step": 7159 }, { "epoch": 0.25290411454860945, "grad_norm": 2.1059672832489014, "learning_rate": 8.7528354653808e-06, "loss": 0.8447, "step": 7160 }, { "epoch": 0.2529394363523173, "grad_norm": 1.792680025100708, "learning_rate": 8.752457459985782e-06, "loss": 0.8378, "step": 7161 }, { "epoch": 0.2529747581560252, "grad_norm": 1.9216047525405884, "learning_rate": 8.752079405479308e-06, "loss": 0.8625, "step": 7162 }, { "epoch": 0.2530100799597331, "grad_norm": 1.7790414094924927, "learning_rate": 8.751701301866322e-06, "loss": 0.8511, "step": 7163 }, { "epoch": 0.25304540176344104, "grad_norm": 1.587214469909668, "learning_rate": 8.751323149151777e-06, "loss": 0.8072, "step": 7164 }, { "epoch": 0.25308072356714895, "grad_norm": 1.7440987825393677, "learning_rate": 8.750944947340618e-06, "loss": 0.8259, "step": 7165 }, { "epoch": 0.25311604537085686, "grad_norm": 1.0536993741989136, "learning_rate": 8.750566696437797e-06, "loss": 0.6014, "step": 7166 }, { "epoch": 0.25315136717456477, "grad_norm": 2.089153289794922, "learning_rate": 8.750188396448265e-06, "loss": 0.8426, "step": 7167 }, { "epoch": 0.2531866889782727, "grad_norm": 1.6685601472854614, "learning_rate": 8.749810047376971e-06, "loss": 0.8276, "step": 7168 }, { "epoch": 0.2532220107819806, "grad_norm": 2.2099087238311768, "learning_rate": 8.74943164922887e-06, "loss": 0.8775, "step": 7169 }, { "epoch": 0.2532573325856885, "grad_norm": 1.8649464845657349, "learning_rate": 8.74905320200891e-06, "loss": 0.8382, "step": 7170 }, { "epoch": 0.2532926543893964, "grad_norm": 1.70451021194458, "learning_rate": 8.748674705722046e-06, "loss": 0.8441, "step": 7171 }, { "epoch": 0.2533279761931043, "grad_norm": 1.6421202421188354, "learning_rate": 8.748296160373235e-06, "loss": 0.827, "step": 7172 }, { "epoch": 0.25336329799681223, "grad_norm": 1.9406732320785522, "learning_rate": 8.747917565967428e-06, "loss": 0.8605, "step": 7173 }, { "epoch": 0.2533986198005201, "grad_norm": 3.262368679046631, "learning_rate": 8.747538922509578e-06, "loss": 0.8462, "step": 7174 }, { "epoch": 0.253433941604228, "grad_norm": 1.7861026525497437, "learning_rate": 8.747160230004645e-06, "loss": 0.8288, "step": 7175 }, { "epoch": 0.2534692634079359, "grad_norm": 1.6222138404846191, "learning_rate": 8.746781488457584e-06, "loss": 0.8398, "step": 7176 }, { "epoch": 0.2535045852116438, "grad_norm": 1.6059000492095947, "learning_rate": 8.74640269787335e-06, "loss": 0.8249, "step": 7177 }, { "epoch": 0.25353990701535173, "grad_norm": 1.8066853284835815, "learning_rate": 8.7460238582569e-06, "loss": 0.8303, "step": 7178 }, { "epoch": 0.25357522881905964, "grad_norm": 1.8312073945999146, "learning_rate": 8.745644969613196e-06, "loss": 0.8605, "step": 7179 }, { "epoch": 0.25361055062276755, "grad_norm": 1.8068499565124512, "learning_rate": 8.745266031947194e-06, "loss": 0.847, "step": 7180 }, { "epoch": 0.25364587242647546, "grad_norm": 1.7997783422470093, "learning_rate": 8.744887045263854e-06, "loss": 0.8633, "step": 7181 }, { "epoch": 0.25368119423018337, "grad_norm": 1.7803447246551514, "learning_rate": 8.744508009568136e-06, "loss": 0.8712, "step": 7182 }, { "epoch": 0.2537165160338913, "grad_norm": 1.6827446222305298, "learning_rate": 8.744128924865004e-06, "loss": 0.8364, "step": 7183 }, { "epoch": 0.2537518378375992, "grad_norm": 1.7212505340576172, "learning_rate": 8.743749791159413e-06, "loss": 0.8357, "step": 7184 }, { "epoch": 0.2537871596413071, "grad_norm": 1.716654658317566, "learning_rate": 8.743370608456329e-06, "loss": 0.825, "step": 7185 }, { "epoch": 0.253822481445015, "grad_norm": 2.2289836406707764, "learning_rate": 8.742991376760717e-06, "loss": 0.874, "step": 7186 }, { "epoch": 0.25385780324872287, "grad_norm": 1.7275630235671997, "learning_rate": 8.742612096077534e-06, "loss": 0.8287, "step": 7187 }, { "epoch": 0.2538931250524308, "grad_norm": 1.685719609260559, "learning_rate": 8.742232766411748e-06, "loss": 0.8066, "step": 7188 }, { "epoch": 0.2539284468561387, "grad_norm": 1.5889936685562134, "learning_rate": 8.741853387768325e-06, "loss": 0.8294, "step": 7189 }, { "epoch": 0.2539637686598466, "grad_norm": 1.6677881479263306, "learning_rate": 8.741473960152228e-06, "loss": 0.7844, "step": 7190 }, { "epoch": 0.2539990904635545, "grad_norm": 1.6843420267105103, "learning_rate": 8.741094483568421e-06, "loss": 0.7961, "step": 7191 }, { "epoch": 0.2540344122672624, "grad_norm": 1.92566978931427, "learning_rate": 8.740714958021874e-06, "loss": 0.8547, "step": 7192 }, { "epoch": 0.25406973407097033, "grad_norm": 2.0287585258483887, "learning_rate": 8.740335383517552e-06, "loss": 0.8488, "step": 7193 }, { "epoch": 0.25410505587467824, "grad_norm": 1.727178692817688, "learning_rate": 8.739955760060425e-06, "loss": 0.7901, "step": 7194 }, { "epoch": 0.25414037767838615, "grad_norm": 1.7821111679077148, "learning_rate": 8.73957608765546e-06, "loss": 0.8461, "step": 7195 }, { "epoch": 0.25417569948209406, "grad_norm": 1.719490647315979, "learning_rate": 8.739196366307624e-06, "loss": 0.8285, "step": 7196 }, { "epoch": 0.254211021285802, "grad_norm": 1.8145804405212402, "learning_rate": 8.738816596021893e-06, "loss": 0.8534, "step": 7197 }, { "epoch": 0.2542463430895099, "grad_norm": 1.7725257873535156, "learning_rate": 8.738436776803229e-06, "loss": 0.8422, "step": 7198 }, { "epoch": 0.2542816648932178, "grad_norm": 1.5332603454589844, "learning_rate": 8.73805690865661e-06, "loss": 0.843, "step": 7199 }, { "epoch": 0.25431698669692565, "grad_norm": 1.6363131999969482, "learning_rate": 8.737676991587003e-06, "loss": 0.8331, "step": 7200 }, { "epoch": 0.25435230850063356, "grad_norm": 1.5699491500854492, "learning_rate": 8.737297025599382e-06, "loss": 0.8449, "step": 7201 }, { "epoch": 0.25438763030434147, "grad_norm": 1.1176085472106934, "learning_rate": 8.736917010698722e-06, "loss": 0.6324, "step": 7202 }, { "epoch": 0.2544229521080494, "grad_norm": 1.647149682044983, "learning_rate": 8.736536946889994e-06, "loss": 0.8439, "step": 7203 }, { "epoch": 0.2544582739117573, "grad_norm": 1.6686345338821411, "learning_rate": 8.736156834178171e-06, "loss": 0.8846, "step": 7204 }, { "epoch": 0.2544935957154652, "grad_norm": 1.653377652168274, "learning_rate": 8.73577667256823e-06, "loss": 0.816, "step": 7205 }, { "epoch": 0.2545289175191731, "grad_norm": 1.7023944854736328, "learning_rate": 8.735396462065148e-06, "loss": 0.8598, "step": 7206 }, { "epoch": 0.254564239322881, "grad_norm": 1.7611500024795532, "learning_rate": 8.735016202673899e-06, "loss": 0.8679, "step": 7207 }, { "epoch": 0.25459956112658894, "grad_norm": 1.7552878856658936, "learning_rate": 8.734635894399458e-06, "loss": 0.8934, "step": 7208 }, { "epoch": 0.25463488293029685, "grad_norm": 1.9453436136245728, "learning_rate": 8.734255537246807e-06, "loss": 0.852, "step": 7209 }, { "epoch": 0.25467020473400476, "grad_norm": 1.7744807004928589, "learning_rate": 8.73387513122092e-06, "loss": 0.8253, "step": 7210 }, { "epoch": 0.25470552653771267, "grad_norm": 1.691125512123108, "learning_rate": 8.733494676326778e-06, "loss": 0.8784, "step": 7211 }, { "epoch": 0.2547408483414206, "grad_norm": 1.8647880554199219, "learning_rate": 8.733114172569359e-06, "loss": 0.8527, "step": 7212 }, { "epoch": 0.25477617014512843, "grad_norm": 1.6914029121398926, "learning_rate": 8.732733619953643e-06, "loss": 0.8314, "step": 7213 }, { "epoch": 0.25481149194883634, "grad_norm": 1.6184254884719849, "learning_rate": 8.732353018484611e-06, "loss": 0.8663, "step": 7214 }, { "epoch": 0.25484681375254425, "grad_norm": 1.8850599527359009, "learning_rate": 8.731972368167247e-06, "loss": 0.8239, "step": 7215 }, { "epoch": 0.25488213555625217, "grad_norm": 1.6783089637756348, "learning_rate": 8.731591669006527e-06, "loss": 0.8483, "step": 7216 }, { "epoch": 0.2549174573599601, "grad_norm": 1.9197949171066284, "learning_rate": 8.731210921007437e-06, "loss": 0.8157, "step": 7217 }, { "epoch": 0.254952779163668, "grad_norm": 1.8414984941482544, "learning_rate": 8.730830124174962e-06, "loss": 0.8102, "step": 7218 }, { "epoch": 0.2549881009673759, "grad_norm": 1.690307855606079, "learning_rate": 8.730449278514082e-06, "loss": 0.8349, "step": 7219 }, { "epoch": 0.2550234227710838, "grad_norm": 1.7309050559997559, "learning_rate": 8.730068384029784e-06, "loss": 0.8203, "step": 7220 }, { "epoch": 0.2550587445747917, "grad_norm": 1.6781597137451172, "learning_rate": 8.72968744072705e-06, "loss": 0.8544, "step": 7221 }, { "epoch": 0.25509406637849963, "grad_norm": 1.6899938583374023, "learning_rate": 8.729306448610871e-06, "loss": 0.8482, "step": 7222 }, { "epoch": 0.25512938818220754, "grad_norm": 1.6132991313934326, "learning_rate": 8.728925407686228e-06, "loss": 0.8666, "step": 7223 }, { "epoch": 0.25516470998591545, "grad_norm": 1.133599042892456, "learning_rate": 8.728544317958111e-06, "loss": 0.5994, "step": 7224 }, { "epoch": 0.25520003178962336, "grad_norm": 1.6478625535964966, "learning_rate": 8.728163179431507e-06, "loss": 0.8186, "step": 7225 }, { "epoch": 0.25523535359333127, "grad_norm": 1.7152230739593506, "learning_rate": 8.727781992111403e-06, "loss": 0.8, "step": 7226 }, { "epoch": 0.2552706753970391, "grad_norm": 1.639072060585022, "learning_rate": 8.727400756002791e-06, "loss": 0.8232, "step": 7227 }, { "epoch": 0.25530599720074704, "grad_norm": 1.7461897134780884, "learning_rate": 8.727019471110655e-06, "loss": 0.8331, "step": 7228 }, { "epoch": 0.25534131900445495, "grad_norm": 1.7982176542282104, "learning_rate": 8.726638137439994e-06, "loss": 0.8489, "step": 7229 }, { "epoch": 0.25537664080816286, "grad_norm": 1.6362342834472656, "learning_rate": 8.72625675499579e-06, "loss": 0.8043, "step": 7230 }, { "epoch": 0.25541196261187077, "grad_norm": 1.8136556148529053, "learning_rate": 8.725875323783039e-06, "loss": 0.8149, "step": 7231 }, { "epoch": 0.2554472844155787, "grad_norm": 1.7164112329483032, "learning_rate": 8.725493843806731e-06, "loss": 0.8756, "step": 7232 }, { "epoch": 0.2554826062192866, "grad_norm": 1.687369465827942, "learning_rate": 8.725112315071862e-06, "loss": 0.8535, "step": 7233 }, { "epoch": 0.2555179280229945, "grad_norm": 1.8821828365325928, "learning_rate": 8.724730737583422e-06, "loss": 0.8555, "step": 7234 }, { "epoch": 0.2555532498267024, "grad_norm": 1.6610671281814575, "learning_rate": 8.724349111346406e-06, "loss": 0.8568, "step": 7235 }, { "epoch": 0.2555885716304103, "grad_norm": 1.6554033756256104, "learning_rate": 8.72396743636581e-06, "loss": 0.8625, "step": 7236 }, { "epoch": 0.25562389343411823, "grad_norm": 1.5947686433792114, "learning_rate": 8.723585712646628e-06, "loss": 0.863, "step": 7237 }, { "epoch": 0.25565921523782614, "grad_norm": 1.6322910785675049, "learning_rate": 8.723203940193855e-06, "loss": 0.8127, "step": 7238 }, { "epoch": 0.25569453704153405, "grad_norm": 1.7416037321090698, "learning_rate": 8.72282211901249e-06, "loss": 0.8239, "step": 7239 }, { "epoch": 0.2557298588452419, "grad_norm": 1.700001835823059, "learning_rate": 8.722440249107527e-06, "loss": 0.8805, "step": 7240 }, { "epoch": 0.2557651806489498, "grad_norm": 1.6353695392608643, "learning_rate": 8.722058330483968e-06, "loss": 0.8438, "step": 7241 }, { "epoch": 0.25580050245265773, "grad_norm": 1.6577773094177246, "learning_rate": 8.72167636314681e-06, "loss": 0.8643, "step": 7242 }, { "epoch": 0.25583582425636564, "grad_norm": 1.6692103147506714, "learning_rate": 8.721294347101048e-06, "loss": 0.7782, "step": 7243 }, { "epoch": 0.25587114606007355, "grad_norm": 1.6165597438812256, "learning_rate": 8.720912282351688e-06, "loss": 0.8309, "step": 7244 }, { "epoch": 0.25590646786378146, "grad_norm": 1.7601990699768066, "learning_rate": 8.720530168903724e-06, "loss": 0.8779, "step": 7245 }, { "epoch": 0.25594178966748937, "grad_norm": 1.7608953714370728, "learning_rate": 8.720148006762163e-06, "loss": 0.8644, "step": 7246 }, { "epoch": 0.2559771114711973, "grad_norm": 1.844138503074646, "learning_rate": 8.719765795932005e-06, "loss": 0.845, "step": 7247 }, { "epoch": 0.2560124332749052, "grad_norm": 1.5217989683151245, "learning_rate": 8.71938353641825e-06, "loss": 0.8404, "step": 7248 }, { "epoch": 0.2560477550786131, "grad_norm": 1.768129825592041, "learning_rate": 8.719001228225904e-06, "loss": 0.8413, "step": 7249 }, { "epoch": 0.256083076882321, "grad_norm": 1.5036603212356567, "learning_rate": 8.718618871359968e-06, "loss": 0.816, "step": 7250 }, { "epoch": 0.2561183986860289, "grad_norm": 1.838362455368042, "learning_rate": 8.718236465825447e-06, "loss": 0.819, "step": 7251 }, { "epoch": 0.25615372048973684, "grad_norm": 1.6329883337020874, "learning_rate": 8.717854011627347e-06, "loss": 0.8246, "step": 7252 }, { "epoch": 0.2561890422934447, "grad_norm": 1.8223345279693604, "learning_rate": 8.717471508770672e-06, "loss": 0.8456, "step": 7253 }, { "epoch": 0.2562243640971526, "grad_norm": 1.6575018167495728, "learning_rate": 8.717088957260428e-06, "loss": 0.8708, "step": 7254 }, { "epoch": 0.2562596859008605, "grad_norm": 1.500656008720398, "learning_rate": 8.716706357101624e-06, "loss": 0.8188, "step": 7255 }, { "epoch": 0.2562950077045684, "grad_norm": 1.8133875131607056, "learning_rate": 8.716323708299265e-06, "loss": 0.8676, "step": 7256 }, { "epoch": 0.25633032950827633, "grad_norm": 1.670900821685791, "learning_rate": 8.71594101085836e-06, "loss": 0.843, "step": 7257 }, { "epoch": 0.25636565131198424, "grad_norm": 1.053865671157837, "learning_rate": 8.715558264783916e-06, "loss": 0.6009, "step": 7258 }, { "epoch": 0.25640097311569215, "grad_norm": 1.7509859800338745, "learning_rate": 8.715175470080947e-06, "loss": 0.8508, "step": 7259 }, { "epoch": 0.25643629491940007, "grad_norm": 1.7585289478302002, "learning_rate": 8.714792626754458e-06, "loss": 0.8255, "step": 7260 }, { "epoch": 0.256471616723108, "grad_norm": 1.8375283479690552, "learning_rate": 8.714409734809461e-06, "loss": 0.8438, "step": 7261 }, { "epoch": 0.2565069385268159, "grad_norm": 1.7649203538894653, "learning_rate": 8.714026794250968e-06, "loss": 0.8371, "step": 7262 }, { "epoch": 0.2565422603305238, "grad_norm": 1.5688732862472534, "learning_rate": 8.713643805083991e-06, "loss": 0.8099, "step": 7263 }, { "epoch": 0.2565775821342317, "grad_norm": 1.5708286762237549, "learning_rate": 8.71326076731354e-06, "loss": 0.8039, "step": 7264 }, { "epoch": 0.2566129039379396, "grad_norm": 1.884975552558899, "learning_rate": 8.712877680944634e-06, "loss": 0.848, "step": 7265 }, { "epoch": 0.2566482257416475, "grad_norm": 1.674369215965271, "learning_rate": 8.71249454598228e-06, "loss": 0.8318, "step": 7266 }, { "epoch": 0.2566835475453554, "grad_norm": 0.9690860509872437, "learning_rate": 8.712111362431496e-06, "loss": 0.5904, "step": 7267 }, { "epoch": 0.2567188693490633, "grad_norm": 1.906672477722168, "learning_rate": 8.711728130297296e-06, "loss": 0.8469, "step": 7268 }, { "epoch": 0.2567541911527712, "grad_norm": 1.8083863258361816, "learning_rate": 8.711344849584694e-06, "loss": 0.8155, "step": 7269 }, { "epoch": 0.2567895129564791, "grad_norm": 1.6716322898864746, "learning_rate": 8.710961520298712e-06, "loss": 0.859, "step": 7270 }, { "epoch": 0.256824834760187, "grad_norm": 1.7372877597808838, "learning_rate": 8.710578142444361e-06, "loss": 0.8261, "step": 7271 }, { "epoch": 0.25686015656389494, "grad_norm": 2.864574670791626, "learning_rate": 8.710194716026663e-06, "loss": 0.8364, "step": 7272 }, { "epoch": 0.25689547836760285, "grad_norm": 1.6009265184402466, "learning_rate": 8.709811241050633e-06, "loss": 0.8697, "step": 7273 }, { "epoch": 0.25693080017131076, "grad_norm": 1.6612597703933716, "learning_rate": 8.709427717521288e-06, "loss": 0.8152, "step": 7274 }, { "epoch": 0.25696612197501867, "grad_norm": 1.702107548713684, "learning_rate": 8.709044145443654e-06, "loss": 0.8298, "step": 7275 }, { "epoch": 0.2570014437787266, "grad_norm": 1.5710318088531494, "learning_rate": 8.708660524822745e-06, "loss": 0.8704, "step": 7276 }, { "epoch": 0.2570367655824345, "grad_norm": 1.839906930923462, "learning_rate": 8.708276855663585e-06, "loss": 0.859, "step": 7277 }, { "epoch": 0.2570720873861424, "grad_norm": 1.8498380184173584, "learning_rate": 8.707893137971195e-06, "loss": 0.846, "step": 7278 }, { "epoch": 0.25710740918985026, "grad_norm": 1.8218251466751099, "learning_rate": 8.707509371750598e-06, "loss": 0.8238, "step": 7279 }, { "epoch": 0.25714273099355817, "grad_norm": 1.5810400247573853, "learning_rate": 8.707125557006813e-06, "loss": 0.8004, "step": 7280 }, { "epoch": 0.2571780527972661, "grad_norm": 1.6303911209106445, "learning_rate": 8.706741693744866e-06, "loss": 0.8466, "step": 7281 }, { "epoch": 0.257213374600974, "grad_norm": 1.683790922164917, "learning_rate": 8.706357781969781e-06, "loss": 0.8377, "step": 7282 }, { "epoch": 0.2572486964046819, "grad_norm": 1.5724893808364868, "learning_rate": 8.705973821686581e-06, "loss": 0.8029, "step": 7283 }, { "epoch": 0.2572840182083898, "grad_norm": 1.6946532726287842, "learning_rate": 8.705589812900293e-06, "loss": 0.8562, "step": 7284 }, { "epoch": 0.2573193400120977, "grad_norm": 2.081223964691162, "learning_rate": 8.705205755615942e-06, "loss": 0.8299, "step": 7285 }, { "epoch": 0.25735466181580563, "grad_norm": 1.6222503185272217, "learning_rate": 8.704821649838555e-06, "loss": 0.8044, "step": 7286 }, { "epoch": 0.25738998361951354, "grad_norm": 1.6786127090454102, "learning_rate": 8.704437495573159e-06, "loss": 0.8451, "step": 7287 }, { "epoch": 0.25742530542322145, "grad_norm": 1.6619985103607178, "learning_rate": 8.704053292824782e-06, "loss": 0.8521, "step": 7288 }, { "epoch": 0.25746062722692936, "grad_norm": 1.699041485786438, "learning_rate": 8.70366904159845e-06, "loss": 0.8359, "step": 7289 }, { "epoch": 0.2574959490306373, "grad_norm": 1.8720790147781372, "learning_rate": 8.703284741899194e-06, "loss": 0.8456, "step": 7290 }, { "epoch": 0.2575312708343452, "grad_norm": 1.6882469654083252, "learning_rate": 8.702900393732043e-06, "loss": 0.8092, "step": 7291 }, { "epoch": 0.25756659263805304, "grad_norm": 1.5577276945114136, "learning_rate": 8.70251599710203e-06, "loss": 0.8357, "step": 7292 }, { "epoch": 0.25760191444176095, "grad_norm": 1.6290760040283203, "learning_rate": 8.702131552014181e-06, "loss": 0.8374, "step": 7293 }, { "epoch": 0.25763723624546886, "grad_norm": 1.8568156957626343, "learning_rate": 8.701747058473532e-06, "loss": 0.8641, "step": 7294 }, { "epoch": 0.25767255804917677, "grad_norm": 1.7419663667678833, "learning_rate": 8.701362516485113e-06, "loss": 0.7923, "step": 7295 }, { "epoch": 0.2577078798528847, "grad_norm": 1.7455500364303589, "learning_rate": 8.700977926053956e-06, "loss": 0.8475, "step": 7296 }, { "epoch": 0.2577432016565926, "grad_norm": 1.6409045457839966, "learning_rate": 8.700593287185098e-06, "loss": 0.8064, "step": 7297 }, { "epoch": 0.2577785234603005, "grad_norm": 1.7104860544204712, "learning_rate": 8.700208599883569e-06, "loss": 0.8287, "step": 7298 }, { "epoch": 0.2578138452640084, "grad_norm": 1.9544428586959839, "learning_rate": 8.699823864154404e-06, "loss": 0.8247, "step": 7299 }, { "epoch": 0.2578491670677163, "grad_norm": 1.7570137977600098, "learning_rate": 8.699439080002641e-06, "loss": 0.8304, "step": 7300 }, { "epoch": 0.25788448887142423, "grad_norm": 2.3535850048065186, "learning_rate": 8.699054247433317e-06, "loss": 0.8408, "step": 7301 }, { "epoch": 0.25791981067513214, "grad_norm": 1.752488136291504, "learning_rate": 8.698669366451465e-06, "loss": 0.8064, "step": 7302 }, { "epoch": 0.25795513247884005, "grad_norm": 2.2722251415252686, "learning_rate": 8.698284437062122e-06, "loss": 0.8652, "step": 7303 }, { "epoch": 0.25799045428254797, "grad_norm": 1.7964770793914795, "learning_rate": 8.697899459270328e-06, "loss": 0.8398, "step": 7304 }, { "epoch": 0.2580257760862558, "grad_norm": 1.7835930585861206, "learning_rate": 8.697514433081122e-06, "loss": 0.826, "step": 7305 }, { "epoch": 0.25806109788996373, "grad_norm": 1.6587332487106323, "learning_rate": 8.69712935849954e-06, "loss": 0.8225, "step": 7306 }, { "epoch": 0.25809641969367164, "grad_norm": 1.7174251079559326, "learning_rate": 8.696744235530628e-06, "loss": 0.8157, "step": 7307 }, { "epoch": 0.25813174149737955, "grad_norm": 1.7804118394851685, "learning_rate": 8.696359064179419e-06, "loss": 0.8496, "step": 7308 }, { "epoch": 0.25816706330108746, "grad_norm": 1.597902774810791, "learning_rate": 8.695973844450958e-06, "loss": 0.7979, "step": 7309 }, { "epoch": 0.2582023851047954, "grad_norm": 1.9101260900497437, "learning_rate": 8.695588576350287e-06, "loss": 0.8429, "step": 7310 }, { "epoch": 0.2582377069085033, "grad_norm": 2.1876916885375977, "learning_rate": 8.695203259882447e-06, "loss": 0.8319, "step": 7311 }, { "epoch": 0.2582730287122112, "grad_norm": 1.881895899772644, "learning_rate": 8.694817895052482e-06, "loss": 0.8841, "step": 7312 }, { "epoch": 0.2583083505159191, "grad_norm": 1.693243384361267, "learning_rate": 8.694432481865433e-06, "loss": 0.8128, "step": 7313 }, { "epoch": 0.258343672319627, "grad_norm": 1.71323561668396, "learning_rate": 8.694047020326348e-06, "loss": 0.8613, "step": 7314 }, { "epoch": 0.2583789941233349, "grad_norm": 2.731729745864868, "learning_rate": 8.69366151044027e-06, "loss": 0.8255, "step": 7315 }, { "epoch": 0.25841431592704284, "grad_norm": 1.7068482637405396, "learning_rate": 8.693275952212244e-06, "loss": 0.8253, "step": 7316 }, { "epoch": 0.25844963773075075, "grad_norm": 1.7354973554611206, "learning_rate": 8.692890345647316e-06, "loss": 0.8228, "step": 7317 }, { "epoch": 0.2584849595344586, "grad_norm": 1.851420283317566, "learning_rate": 8.692504690750533e-06, "loss": 0.826, "step": 7318 }, { "epoch": 0.2585202813381665, "grad_norm": 1.7390944957733154, "learning_rate": 8.692118987526945e-06, "loss": 0.8444, "step": 7319 }, { "epoch": 0.2585556031418744, "grad_norm": 1.767311692237854, "learning_rate": 8.691733235981596e-06, "loss": 0.8762, "step": 7320 }, { "epoch": 0.25859092494558233, "grad_norm": 1.7911152839660645, "learning_rate": 8.691347436119536e-06, "loss": 0.84, "step": 7321 }, { "epoch": 0.25862624674929025, "grad_norm": 1.7252131700515747, "learning_rate": 8.690961587945815e-06, "loss": 0.8286, "step": 7322 }, { "epoch": 0.25866156855299816, "grad_norm": 1.8140666484832764, "learning_rate": 8.690575691465483e-06, "loss": 0.8839, "step": 7323 }, { "epoch": 0.25869689035670607, "grad_norm": 1.865312099456787, "learning_rate": 8.69018974668359e-06, "loss": 0.852, "step": 7324 }, { "epoch": 0.258732212160414, "grad_norm": 1.9066962003707886, "learning_rate": 8.689803753605187e-06, "loss": 0.8432, "step": 7325 }, { "epoch": 0.2587675339641219, "grad_norm": 0.92445307970047, "learning_rate": 8.689417712235326e-06, "loss": 0.5913, "step": 7326 }, { "epoch": 0.2588028557678298, "grad_norm": 1.9322928190231323, "learning_rate": 8.689031622579061e-06, "loss": 0.8382, "step": 7327 }, { "epoch": 0.2588381775715377, "grad_norm": 1.8048133850097656, "learning_rate": 8.688645484641442e-06, "loss": 0.8609, "step": 7328 }, { "epoch": 0.2588734993752456, "grad_norm": 1.9526829719543457, "learning_rate": 8.688259298427524e-06, "loss": 0.8525, "step": 7329 }, { "epoch": 0.25890882117895353, "grad_norm": 1.550376296043396, "learning_rate": 8.687873063942363e-06, "loss": 0.8203, "step": 7330 }, { "epoch": 0.2589441429826614, "grad_norm": 1.651435136795044, "learning_rate": 8.687486781191012e-06, "loss": 0.8349, "step": 7331 }, { "epoch": 0.2589794647863693, "grad_norm": 1.8990375995635986, "learning_rate": 8.687100450178527e-06, "loss": 0.8587, "step": 7332 }, { "epoch": 0.2590147865900772, "grad_norm": 1.8662102222442627, "learning_rate": 8.686714070909966e-06, "loss": 0.8185, "step": 7333 }, { "epoch": 0.2590501083937851, "grad_norm": 1.8235905170440674, "learning_rate": 8.686327643390383e-06, "loss": 0.8549, "step": 7334 }, { "epoch": 0.259085430197493, "grad_norm": 1.7551980018615723, "learning_rate": 8.685941167624835e-06, "loss": 0.8356, "step": 7335 }, { "epoch": 0.25912075200120094, "grad_norm": 1.7238267660140991, "learning_rate": 8.685554643618384e-06, "loss": 0.8254, "step": 7336 }, { "epoch": 0.25915607380490885, "grad_norm": 1.629028558731079, "learning_rate": 8.685168071376084e-06, "loss": 0.8368, "step": 7337 }, { "epoch": 0.25919139560861676, "grad_norm": 1.9295427799224854, "learning_rate": 8.684781450902999e-06, "loss": 0.8141, "step": 7338 }, { "epoch": 0.25922671741232467, "grad_norm": 1.980766773223877, "learning_rate": 8.68439478220419e-06, "loss": 0.8579, "step": 7339 }, { "epoch": 0.2592620392160326, "grad_norm": 1.6552033424377441, "learning_rate": 8.68400806528471e-06, "loss": 0.8272, "step": 7340 }, { "epoch": 0.2592973610197405, "grad_norm": 1.6345003843307495, "learning_rate": 8.683621300149627e-06, "loss": 0.8065, "step": 7341 }, { "epoch": 0.2593326828234484, "grad_norm": 1.6918617486953735, "learning_rate": 8.683234486804e-06, "loss": 0.8125, "step": 7342 }, { "epoch": 0.2593680046271563, "grad_norm": 1.6617478132247925, "learning_rate": 8.682847625252894e-06, "loss": 0.8384, "step": 7343 }, { "epoch": 0.25940332643086417, "grad_norm": 1.628589153289795, "learning_rate": 8.68246071550137e-06, "loss": 0.8265, "step": 7344 }, { "epoch": 0.2594386482345721, "grad_norm": 1.7061035633087158, "learning_rate": 8.682073757554491e-06, "loss": 0.8351, "step": 7345 }, { "epoch": 0.25947397003828, "grad_norm": 1.592170238494873, "learning_rate": 8.681686751417324e-06, "loss": 0.8338, "step": 7346 }, { "epoch": 0.2595092918419879, "grad_norm": 1.5731980800628662, "learning_rate": 8.681299697094932e-06, "loss": 0.8413, "step": 7347 }, { "epoch": 0.2595446136456958, "grad_norm": 1.9062329530715942, "learning_rate": 8.680912594592383e-06, "loss": 0.8484, "step": 7348 }, { "epoch": 0.2595799354494037, "grad_norm": 1.7477020025253296, "learning_rate": 8.68052544391474e-06, "loss": 0.869, "step": 7349 }, { "epoch": 0.25961525725311163, "grad_norm": 1.5649741888046265, "learning_rate": 8.680138245067073e-06, "loss": 0.8214, "step": 7350 }, { "epoch": 0.25965057905681954, "grad_norm": 1.5377486944198608, "learning_rate": 8.679750998054449e-06, "loss": 0.8229, "step": 7351 }, { "epoch": 0.25968590086052745, "grad_norm": 1.730768084526062, "learning_rate": 8.679363702881935e-06, "loss": 0.8153, "step": 7352 }, { "epoch": 0.25972122266423536, "grad_norm": 1.863455891609192, "learning_rate": 8.6789763595546e-06, "loss": 0.8552, "step": 7353 }, { "epoch": 0.2597565444679433, "grad_norm": 1.8492522239685059, "learning_rate": 8.678588968077515e-06, "loss": 0.832, "step": 7354 }, { "epoch": 0.2597918662716512, "grad_norm": 1.606610655784607, "learning_rate": 8.67820152845575e-06, "loss": 0.8052, "step": 7355 }, { "epoch": 0.2598271880753591, "grad_norm": 1.55558180809021, "learning_rate": 8.677814040694372e-06, "loss": 0.8288, "step": 7356 }, { "epoch": 0.25986250987906695, "grad_norm": 1.7216590642929077, "learning_rate": 8.677426504798457e-06, "loss": 0.7958, "step": 7357 }, { "epoch": 0.25989783168277486, "grad_norm": 1.7671493291854858, "learning_rate": 8.677038920773075e-06, "loss": 0.8332, "step": 7358 }, { "epoch": 0.25993315348648277, "grad_norm": 1.6284794807434082, "learning_rate": 8.676651288623299e-06, "loss": 0.8261, "step": 7359 }, { "epoch": 0.2599684752901907, "grad_norm": 1.6767594814300537, "learning_rate": 8.6762636083542e-06, "loss": 0.8366, "step": 7360 }, { "epoch": 0.2600037970938986, "grad_norm": 1.7731938362121582, "learning_rate": 8.675875879970856e-06, "loss": 0.847, "step": 7361 }, { "epoch": 0.2600391188976065, "grad_norm": 1.563222050666809, "learning_rate": 8.67548810347834e-06, "loss": 0.8191, "step": 7362 }, { "epoch": 0.2600744407013144, "grad_norm": 1.8634556531906128, "learning_rate": 8.675100278881725e-06, "loss": 0.8341, "step": 7363 }, { "epoch": 0.2601097625050223, "grad_norm": 1.7337982654571533, "learning_rate": 8.67471240618609e-06, "loss": 0.8223, "step": 7364 }, { "epoch": 0.26014508430873023, "grad_norm": 1.7411943674087524, "learning_rate": 8.67432448539651e-06, "loss": 0.8705, "step": 7365 }, { "epoch": 0.26018040611243815, "grad_norm": 1.7406976222991943, "learning_rate": 8.67393651651806e-06, "loss": 0.8157, "step": 7366 }, { "epoch": 0.26021572791614606, "grad_norm": 1.7039214372634888, "learning_rate": 8.67354849955582e-06, "loss": 0.8467, "step": 7367 }, { "epoch": 0.26025104971985397, "grad_norm": 1.672124981880188, "learning_rate": 8.673160434514869e-06, "loss": 0.8433, "step": 7368 }, { "epoch": 0.2602863715235619, "grad_norm": 1.5206387042999268, "learning_rate": 8.672772321400282e-06, "loss": 0.8226, "step": 7369 }, { "epoch": 0.26032169332726973, "grad_norm": 1.6841516494750977, "learning_rate": 8.672384160217144e-06, "loss": 0.8588, "step": 7370 }, { "epoch": 0.26035701513097764, "grad_norm": 2.0354831218719482, "learning_rate": 8.671995950970531e-06, "loss": 0.861, "step": 7371 }, { "epoch": 0.26039233693468555, "grad_norm": 1.827973484992981, "learning_rate": 8.671607693665527e-06, "loss": 0.8398, "step": 7372 }, { "epoch": 0.26042765873839346, "grad_norm": 1.8038266897201538, "learning_rate": 8.671219388307211e-06, "loss": 0.8203, "step": 7373 }, { "epoch": 0.2604629805421014, "grad_norm": 1.5812088251113892, "learning_rate": 8.670831034900664e-06, "loss": 0.8473, "step": 7374 }, { "epoch": 0.2604983023458093, "grad_norm": 2.4839425086975098, "learning_rate": 8.670442633450971e-06, "loss": 0.8499, "step": 7375 }, { "epoch": 0.2605336241495172, "grad_norm": 1.8442330360412598, "learning_rate": 8.670054183963216e-06, "loss": 0.8322, "step": 7376 }, { "epoch": 0.2605689459532251, "grad_norm": 1.8199101686477661, "learning_rate": 8.66966568644248e-06, "loss": 0.8005, "step": 7377 }, { "epoch": 0.260604267756933, "grad_norm": 1.6927490234375, "learning_rate": 8.669277140893853e-06, "loss": 0.821, "step": 7378 }, { "epoch": 0.2606395895606409, "grad_norm": 1.7322733402252197, "learning_rate": 8.668888547322412e-06, "loss": 0.8196, "step": 7379 }, { "epoch": 0.26067491136434884, "grad_norm": 1.7723865509033203, "learning_rate": 8.66849990573325e-06, "loss": 0.8377, "step": 7380 }, { "epoch": 0.26071023316805675, "grad_norm": 1.8316619396209717, "learning_rate": 8.66811121613145e-06, "loss": 0.818, "step": 7381 }, { "epoch": 0.26074555497176466, "grad_norm": 1.7489734888076782, "learning_rate": 8.6677224785221e-06, "loss": 0.8377, "step": 7382 }, { "epoch": 0.2607808767754725, "grad_norm": 1.784321904182434, "learning_rate": 8.667333692910288e-06, "loss": 0.8509, "step": 7383 }, { "epoch": 0.2608161985791804, "grad_norm": 1.8269156217575073, "learning_rate": 8.666944859301102e-06, "loss": 0.8486, "step": 7384 }, { "epoch": 0.26085152038288834, "grad_norm": 1.6389966011047363, "learning_rate": 8.66655597769963e-06, "loss": 0.8357, "step": 7385 }, { "epoch": 0.26088684218659625, "grad_norm": 2.004007339477539, "learning_rate": 8.666167048110965e-06, "loss": 0.822, "step": 7386 }, { "epoch": 0.26092216399030416, "grad_norm": 1.9018810987472534, "learning_rate": 8.665778070540193e-06, "loss": 0.8598, "step": 7387 }, { "epoch": 0.26095748579401207, "grad_norm": 1.737518548965454, "learning_rate": 8.665389044992406e-06, "loss": 0.8444, "step": 7388 }, { "epoch": 0.26099280759772, "grad_norm": 1.7568278312683105, "learning_rate": 8.664999971472699e-06, "loss": 0.8289, "step": 7389 }, { "epoch": 0.2610281294014279, "grad_norm": 2.4685869216918945, "learning_rate": 8.664610849986159e-06, "loss": 0.8691, "step": 7390 }, { "epoch": 0.2610634512051358, "grad_norm": 1.7704249620437622, "learning_rate": 8.664221680537881e-06, "loss": 0.8307, "step": 7391 }, { "epoch": 0.2610987730088437, "grad_norm": 1.7090338468551636, "learning_rate": 8.66383246313296e-06, "loss": 0.8309, "step": 7392 }, { "epoch": 0.2611340948125516, "grad_norm": 2.0882327556610107, "learning_rate": 8.663443197776486e-06, "loss": 0.8553, "step": 7393 }, { "epoch": 0.26116941661625953, "grad_norm": 1.7447936534881592, "learning_rate": 8.663053884473558e-06, "loss": 0.8026, "step": 7394 }, { "epoch": 0.26120473841996744, "grad_norm": 1.6278200149536133, "learning_rate": 8.662664523229268e-06, "loss": 0.7818, "step": 7395 }, { "epoch": 0.2612400602236753, "grad_norm": 1.7116265296936035, "learning_rate": 8.662275114048716e-06, "loss": 0.8247, "step": 7396 }, { "epoch": 0.2612753820273832, "grad_norm": 1.878125548362732, "learning_rate": 8.661885656936992e-06, "loss": 0.8491, "step": 7397 }, { "epoch": 0.2613107038310911, "grad_norm": 1.7002917528152466, "learning_rate": 8.661496151899198e-06, "loss": 0.8529, "step": 7398 }, { "epoch": 0.26134602563479903, "grad_norm": 1.6171220541000366, "learning_rate": 8.661106598940431e-06, "loss": 0.8291, "step": 7399 }, { "epoch": 0.26138134743850694, "grad_norm": 1.7273732423782349, "learning_rate": 8.66071699806579e-06, "loss": 0.8619, "step": 7400 }, { "epoch": 0.26141666924221485, "grad_norm": 1.7646973133087158, "learning_rate": 8.660327349280373e-06, "loss": 0.8175, "step": 7401 }, { "epoch": 0.26145199104592276, "grad_norm": 1.670447587966919, "learning_rate": 8.659937652589278e-06, "loss": 0.8464, "step": 7402 }, { "epoch": 0.26148731284963067, "grad_norm": 1.666846513748169, "learning_rate": 8.65954790799761e-06, "loss": 0.8233, "step": 7403 }, { "epoch": 0.2615226346533386, "grad_norm": 1.6874247789382935, "learning_rate": 8.659158115510464e-06, "loss": 0.8357, "step": 7404 }, { "epoch": 0.2615579564570465, "grad_norm": 1.6936705112457275, "learning_rate": 8.658768275132945e-06, "loss": 0.8415, "step": 7405 }, { "epoch": 0.2615932782607544, "grad_norm": 1.7388437986373901, "learning_rate": 8.658378386870157e-06, "loss": 0.8182, "step": 7406 }, { "epoch": 0.2616286000644623, "grad_norm": 2.01920747756958, "learning_rate": 8.657988450727197e-06, "loss": 0.8921, "step": 7407 }, { "epoch": 0.2616639218681702, "grad_norm": 1.7852087020874023, "learning_rate": 8.657598466709173e-06, "loss": 0.8358, "step": 7408 }, { "epoch": 0.2616992436718781, "grad_norm": 1.8281831741333008, "learning_rate": 8.65720843482119e-06, "loss": 0.8172, "step": 7409 }, { "epoch": 0.261734565475586, "grad_norm": 2.5673201084136963, "learning_rate": 8.656818355068348e-06, "loss": 0.8809, "step": 7410 }, { "epoch": 0.2617698872792939, "grad_norm": 1.897737741470337, "learning_rate": 8.656428227455755e-06, "loss": 0.8627, "step": 7411 }, { "epoch": 0.2618052090830018, "grad_norm": 1.674548864364624, "learning_rate": 8.656038051988516e-06, "loss": 0.8524, "step": 7412 }, { "epoch": 0.2618405308867097, "grad_norm": 1.8924952745437622, "learning_rate": 8.65564782867174e-06, "loss": 0.8163, "step": 7413 }, { "epoch": 0.26187585269041763, "grad_norm": 1.724492073059082, "learning_rate": 8.655257557510532e-06, "loss": 0.8338, "step": 7414 }, { "epoch": 0.26191117449412554, "grad_norm": 1.906106948852539, "learning_rate": 8.65486723851e-06, "loss": 0.8319, "step": 7415 }, { "epoch": 0.26194649629783345, "grad_norm": 2.0712320804595947, "learning_rate": 8.654476871675253e-06, "loss": 0.8355, "step": 7416 }, { "epoch": 0.26198181810154136, "grad_norm": 1.8044140338897705, "learning_rate": 8.654086457011398e-06, "loss": 0.796, "step": 7417 }, { "epoch": 0.2620171399052493, "grad_norm": 1.5939394235610962, "learning_rate": 8.653695994523548e-06, "loss": 0.8345, "step": 7418 }, { "epoch": 0.2620524617089572, "grad_norm": 1.6471385955810547, "learning_rate": 8.653305484216812e-06, "loss": 0.8523, "step": 7419 }, { "epoch": 0.2620877835126651, "grad_norm": 1.9725241661071777, "learning_rate": 8.652914926096298e-06, "loss": 0.8442, "step": 7420 }, { "epoch": 0.262123105316373, "grad_norm": 1.9168781042099, "learning_rate": 8.652524320167122e-06, "loss": 0.816, "step": 7421 }, { "epoch": 0.26215842712008086, "grad_norm": 1.710967779159546, "learning_rate": 8.652133666434395e-06, "loss": 0.8429, "step": 7422 }, { "epoch": 0.26219374892378877, "grad_norm": 1.6122206449508667, "learning_rate": 8.651742964903229e-06, "loss": 0.7964, "step": 7423 }, { "epoch": 0.2622290707274967, "grad_norm": 1.7475312948226929, "learning_rate": 8.651352215578735e-06, "loss": 0.8107, "step": 7424 }, { "epoch": 0.2622643925312046, "grad_norm": 1.5882935523986816, "learning_rate": 8.650961418466032e-06, "loss": 0.8606, "step": 7425 }, { "epoch": 0.2622997143349125, "grad_norm": 1.809738278388977, "learning_rate": 8.650570573570232e-06, "loss": 0.8729, "step": 7426 }, { "epoch": 0.2623350361386204, "grad_norm": 1.743666648864746, "learning_rate": 8.65017968089645e-06, "loss": 0.8286, "step": 7427 }, { "epoch": 0.2623703579423283, "grad_norm": 1.8032286167144775, "learning_rate": 8.649788740449802e-06, "loss": 0.8418, "step": 7428 }, { "epoch": 0.26240567974603624, "grad_norm": 1.9202126264572144, "learning_rate": 8.649397752235407e-06, "loss": 0.8567, "step": 7429 }, { "epoch": 0.26244100154974415, "grad_norm": 1.6830605268478394, "learning_rate": 8.649006716258378e-06, "loss": 0.8319, "step": 7430 }, { "epoch": 0.26247632335345206, "grad_norm": 1.8113839626312256, "learning_rate": 8.648615632523835e-06, "loss": 0.8427, "step": 7431 }, { "epoch": 0.26251164515715997, "grad_norm": 1.215747594833374, "learning_rate": 8.648224501036899e-06, "loss": 0.6157, "step": 7432 }, { "epoch": 0.2625469669608679, "grad_norm": 1.6817961931228638, "learning_rate": 8.647833321802685e-06, "loss": 0.8402, "step": 7433 }, { "epoch": 0.2625822887645758, "grad_norm": 2.0106041431427, "learning_rate": 8.647442094826312e-06, "loss": 0.8535, "step": 7434 }, { "epoch": 0.26261761056828364, "grad_norm": 1.8176701068878174, "learning_rate": 8.647050820112907e-06, "loss": 0.8249, "step": 7435 }, { "epoch": 0.26265293237199155, "grad_norm": 1.7339872121810913, "learning_rate": 8.646659497667584e-06, "loss": 0.8146, "step": 7436 }, { "epoch": 0.26268825417569946, "grad_norm": 1.7086063623428345, "learning_rate": 8.646268127495467e-06, "loss": 0.8205, "step": 7437 }, { "epoch": 0.2627235759794074, "grad_norm": 1.5959354639053345, "learning_rate": 8.645876709601678e-06, "loss": 0.8114, "step": 7438 }, { "epoch": 0.2627588977831153, "grad_norm": 1.7641581296920776, "learning_rate": 8.64548524399134e-06, "loss": 0.8245, "step": 7439 }, { "epoch": 0.2627942195868232, "grad_norm": 1.9481124877929688, "learning_rate": 8.645093730669576e-06, "loss": 0.8321, "step": 7440 }, { "epoch": 0.2628295413905311, "grad_norm": 1.8600711822509766, "learning_rate": 8.644702169641512e-06, "loss": 0.8185, "step": 7441 }, { "epoch": 0.262864863194239, "grad_norm": 1.6153528690338135, "learning_rate": 8.64431056091227e-06, "loss": 0.8196, "step": 7442 }, { "epoch": 0.26290018499794693, "grad_norm": 1.7159627676010132, "learning_rate": 8.643918904486977e-06, "loss": 0.8753, "step": 7443 }, { "epoch": 0.26293550680165484, "grad_norm": 1.6473357677459717, "learning_rate": 8.643527200370758e-06, "loss": 0.8694, "step": 7444 }, { "epoch": 0.26297082860536275, "grad_norm": 1.6036237478256226, "learning_rate": 8.64313544856874e-06, "loss": 0.7982, "step": 7445 }, { "epoch": 0.26300615040907066, "grad_norm": 1.6826469898223877, "learning_rate": 8.642743649086049e-06, "loss": 0.8576, "step": 7446 }, { "epoch": 0.26304147221277857, "grad_norm": 1.9700393676757812, "learning_rate": 8.642351801927816e-06, "loss": 0.8402, "step": 7447 }, { "epoch": 0.2630767940164864, "grad_norm": 1.7254765033721924, "learning_rate": 8.641959907099166e-06, "loss": 0.8494, "step": 7448 }, { "epoch": 0.26311211582019434, "grad_norm": 1.7844908237457275, "learning_rate": 8.641567964605229e-06, "loss": 0.8575, "step": 7449 }, { "epoch": 0.26314743762390225, "grad_norm": 2.3900530338287354, "learning_rate": 8.641175974451136e-06, "loss": 0.8365, "step": 7450 }, { "epoch": 0.26318275942761016, "grad_norm": 1.5902892351150513, "learning_rate": 8.640783936642017e-06, "loss": 0.8162, "step": 7451 }, { "epoch": 0.26321808123131807, "grad_norm": 1.9023064374923706, "learning_rate": 8.640391851183e-06, "loss": 0.8449, "step": 7452 }, { "epoch": 0.263253403035026, "grad_norm": 1.7149162292480469, "learning_rate": 8.639999718079219e-06, "loss": 0.8914, "step": 7453 }, { "epoch": 0.2632887248387339, "grad_norm": 2.0129005908966064, "learning_rate": 8.639607537335808e-06, "loss": 0.7895, "step": 7454 }, { "epoch": 0.2633240466424418, "grad_norm": 1.594775915145874, "learning_rate": 8.639215308957896e-06, "loss": 0.806, "step": 7455 }, { "epoch": 0.2633593684461497, "grad_norm": 1.8395556211471558, "learning_rate": 8.638823032950619e-06, "loss": 0.8182, "step": 7456 }, { "epoch": 0.2633946902498576, "grad_norm": 1.95071542263031, "learning_rate": 8.638430709319109e-06, "loss": 0.8412, "step": 7457 }, { "epoch": 0.26343001205356553, "grad_norm": 1.680441975593567, "learning_rate": 8.638038338068505e-06, "loss": 0.8352, "step": 7458 }, { "epoch": 0.26346533385727344, "grad_norm": 1.9818464517593384, "learning_rate": 8.637645919203935e-06, "loss": 0.7997, "step": 7459 }, { "epoch": 0.26350065566098135, "grad_norm": 1.880043387413025, "learning_rate": 8.637253452730541e-06, "loss": 0.8212, "step": 7460 }, { "epoch": 0.2635359774646892, "grad_norm": 1.6492258310317993, "learning_rate": 8.636860938653458e-06, "loss": 0.8285, "step": 7461 }, { "epoch": 0.2635712992683971, "grad_norm": 1.6986836194992065, "learning_rate": 8.63646837697782e-06, "loss": 0.8443, "step": 7462 }, { "epoch": 0.26360662107210503, "grad_norm": 2.092930555343628, "learning_rate": 8.636075767708771e-06, "loss": 0.8529, "step": 7463 }, { "epoch": 0.26364194287581294, "grad_norm": 2.3299896717071533, "learning_rate": 8.635683110851445e-06, "loss": 0.8697, "step": 7464 }, { "epoch": 0.26367726467952085, "grad_norm": 1.8347711563110352, "learning_rate": 8.635290406410982e-06, "loss": 0.847, "step": 7465 }, { "epoch": 0.26371258648322876, "grad_norm": 1.8360897302627563, "learning_rate": 8.63489765439252e-06, "loss": 0.8608, "step": 7466 }, { "epoch": 0.26374790828693667, "grad_norm": 1.9931268692016602, "learning_rate": 8.6345048548012e-06, "loss": 0.8449, "step": 7467 }, { "epoch": 0.2637832300906446, "grad_norm": 1.6524927616119385, "learning_rate": 8.634112007642165e-06, "loss": 0.8363, "step": 7468 }, { "epoch": 0.2638185518943525, "grad_norm": 1.647849202156067, "learning_rate": 8.633719112920556e-06, "loss": 0.8484, "step": 7469 }, { "epoch": 0.2638538736980604, "grad_norm": 1.8471683263778687, "learning_rate": 8.633326170641515e-06, "loss": 0.834, "step": 7470 }, { "epoch": 0.2638891955017683, "grad_norm": 1.7232210636138916, "learning_rate": 8.63293318081018e-06, "loss": 0.8719, "step": 7471 }, { "epoch": 0.2639245173054762, "grad_norm": 1.6560368537902832, "learning_rate": 8.632540143431703e-06, "loss": 0.8032, "step": 7472 }, { "epoch": 0.26395983910918414, "grad_norm": 1.8274863958358765, "learning_rate": 8.632147058511222e-06, "loss": 0.8315, "step": 7473 }, { "epoch": 0.263995160912892, "grad_norm": 1.6823419332504272, "learning_rate": 8.631753926053883e-06, "loss": 0.8135, "step": 7474 }, { "epoch": 0.2640304827165999, "grad_norm": 1.7363612651824951, "learning_rate": 8.63136074606483e-06, "loss": 0.8429, "step": 7475 }, { "epoch": 0.2640658045203078, "grad_norm": 1.6312074661254883, "learning_rate": 8.630967518549212e-06, "loss": 0.8442, "step": 7476 }, { "epoch": 0.2641011263240157, "grad_norm": 1.7931981086730957, "learning_rate": 8.630574243512173e-06, "loss": 0.8157, "step": 7477 }, { "epoch": 0.26413644812772363, "grad_norm": 1.9540507793426514, "learning_rate": 8.63018092095886e-06, "loss": 0.8019, "step": 7478 }, { "epoch": 0.26417176993143154, "grad_norm": 1.674179196357727, "learning_rate": 8.629787550894421e-06, "loss": 0.8055, "step": 7479 }, { "epoch": 0.26420709173513945, "grad_norm": 1.947078824043274, "learning_rate": 8.629394133324007e-06, "loss": 0.8534, "step": 7480 }, { "epoch": 0.26424241353884736, "grad_norm": 1.6676068305969238, "learning_rate": 8.629000668252764e-06, "loss": 0.8177, "step": 7481 }, { "epoch": 0.2642777353425553, "grad_norm": 1.5266274213790894, "learning_rate": 8.628607155685841e-06, "loss": 0.7804, "step": 7482 }, { "epoch": 0.2643130571462632, "grad_norm": 0.945916473865509, "learning_rate": 8.628213595628394e-06, "loss": 0.5778, "step": 7483 }, { "epoch": 0.2643483789499711, "grad_norm": 1.8937267065048218, "learning_rate": 8.627819988085565e-06, "loss": 0.8567, "step": 7484 }, { "epoch": 0.264383700753679, "grad_norm": 0.9685860872268677, "learning_rate": 8.627426333062512e-06, "loss": 0.6045, "step": 7485 }, { "epoch": 0.2644190225573869, "grad_norm": 1.930229663848877, "learning_rate": 8.627032630564384e-06, "loss": 0.7942, "step": 7486 }, { "epoch": 0.2644543443610948, "grad_norm": 2.2525570392608643, "learning_rate": 8.626638880596335e-06, "loss": 0.8378, "step": 7487 }, { "epoch": 0.2644896661648027, "grad_norm": 1.6677287817001343, "learning_rate": 8.626245083163518e-06, "loss": 0.8232, "step": 7488 }, { "epoch": 0.2645249879685106, "grad_norm": 2.0956432819366455, "learning_rate": 8.625851238271087e-06, "loss": 0.8615, "step": 7489 }, { "epoch": 0.2645603097722185, "grad_norm": 1.7167891263961792, "learning_rate": 8.625457345924198e-06, "loss": 0.8471, "step": 7490 }, { "epoch": 0.2645956315759264, "grad_norm": 1.949984073638916, "learning_rate": 8.625063406128002e-06, "loss": 0.8885, "step": 7491 }, { "epoch": 0.2646309533796343, "grad_norm": 1.721747875213623, "learning_rate": 8.62466941888766e-06, "loss": 0.8162, "step": 7492 }, { "epoch": 0.26466627518334224, "grad_norm": 1.8164784908294678, "learning_rate": 8.624275384208325e-06, "loss": 0.8436, "step": 7493 }, { "epoch": 0.26470159698705015, "grad_norm": 1.6585036516189575, "learning_rate": 8.623881302095156e-06, "loss": 0.8341, "step": 7494 }, { "epoch": 0.26473691879075806, "grad_norm": 0.9971545934677124, "learning_rate": 8.62348717255331e-06, "loss": 0.5713, "step": 7495 }, { "epoch": 0.26477224059446597, "grad_norm": 1.945452332496643, "learning_rate": 8.623092995587942e-06, "loss": 0.8117, "step": 7496 }, { "epoch": 0.2648075623981739, "grad_norm": 1.8191003799438477, "learning_rate": 8.622698771204216e-06, "loss": 0.8378, "step": 7497 }, { "epoch": 0.2648428842018818, "grad_norm": 1.6630464792251587, "learning_rate": 8.62230449940729e-06, "loss": 0.8132, "step": 7498 }, { "epoch": 0.2648782060055897, "grad_norm": 1.7345118522644043, "learning_rate": 8.621910180202323e-06, "loss": 0.8404, "step": 7499 }, { "epoch": 0.26491352780929756, "grad_norm": 1.9177459478378296, "learning_rate": 8.621515813594477e-06, "loss": 0.8195, "step": 7500 }, { "epoch": 0.26494884961300547, "grad_norm": 1.959696888923645, "learning_rate": 8.621121399588911e-06, "loss": 0.8387, "step": 7501 }, { "epoch": 0.2649841714167134, "grad_norm": 1.6555423736572266, "learning_rate": 8.62072693819079e-06, "loss": 0.8599, "step": 7502 }, { "epoch": 0.2650194932204213, "grad_norm": 1.7410075664520264, "learning_rate": 8.620332429405276e-06, "loss": 0.8177, "step": 7503 }, { "epoch": 0.2650548150241292, "grad_norm": 1.6961164474487305, "learning_rate": 8.61993787323753e-06, "loss": 0.8397, "step": 7504 }, { "epoch": 0.2650901368278371, "grad_norm": 1.6862040758132935, "learning_rate": 8.619543269692721e-06, "loss": 0.8214, "step": 7505 }, { "epoch": 0.265125458631545, "grad_norm": 2.091578960418701, "learning_rate": 8.619148618776008e-06, "loss": 0.8529, "step": 7506 }, { "epoch": 0.26516078043525293, "grad_norm": 1.9159538745880127, "learning_rate": 8.618753920492558e-06, "loss": 0.8405, "step": 7507 }, { "epoch": 0.26519610223896084, "grad_norm": 1.7746562957763672, "learning_rate": 8.618359174847536e-06, "loss": 0.8492, "step": 7508 }, { "epoch": 0.26523142404266875, "grad_norm": 1.433154582977295, "learning_rate": 8.61796438184611e-06, "loss": 0.8274, "step": 7509 }, { "epoch": 0.26526674584637666, "grad_norm": 1.5561267137527466, "learning_rate": 8.617569541493447e-06, "loss": 0.8253, "step": 7510 }, { "epoch": 0.26530206765008457, "grad_norm": 1.5423530340194702, "learning_rate": 8.617174653794713e-06, "loss": 0.8625, "step": 7511 }, { "epoch": 0.2653373894537925, "grad_norm": 1.6277111768722534, "learning_rate": 8.616779718755079e-06, "loss": 0.8221, "step": 7512 }, { "epoch": 0.26537271125750034, "grad_norm": 1.6283016204833984, "learning_rate": 8.61638473637971e-06, "loss": 0.8436, "step": 7513 }, { "epoch": 0.26540803306120825, "grad_norm": 1.7112634181976318, "learning_rate": 8.615989706673777e-06, "loss": 0.8225, "step": 7514 }, { "epoch": 0.26544335486491616, "grad_norm": 1.6108615398406982, "learning_rate": 8.615594629642453e-06, "loss": 0.8526, "step": 7515 }, { "epoch": 0.26547867666862407, "grad_norm": 1.7845726013183594, "learning_rate": 8.615199505290904e-06, "loss": 0.8913, "step": 7516 }, { "epoch": 0.265513998472332, "grad_norm": 1.7665824890136719, "learning_rate": 8.614804333624303e-06, "loss": 0.8484, "step": 7517 }, { "epoch": 0.2655493202760399, "grad_norm": 1.8562355041503906, "learning_rate": 8.614409114647823e-06, "loss": 0.8579, "step": 7518 }, { "epoch": 0.2655846420797478, "grad_norm": 1.6378880739212036, "learning_rate": 8.614013848366638e-06, "loss": 0.8237, "step": 7519 }, { "epoch": 0.2656199638834557, "grad_norm": 1.6529600620269775, "learning_rate": 8.613618534785918e-06, "loss": 0.8385, "step": 7520 }, { "epoch": 0.2656552856871636, "grad_norm": 1.6527537107467651, "learning_rate": 8.613223173910835e-06, "loss": 0.8521, "step": 7521 }, { "epoch": 0.26569060749087153, "grad_norm": 1.6041381359100342, "learning_rate": 8.61282776574657e-06, "loss": 0.8311, "step": 7522 }, { "epoch": 0.26572592929457944, "grad_norm": 1.5914453268051147, "learning_rate": 8.612432310298292e-06, "loss": 0.8155, "step": 7523 }, { "epoch": 0.26576125109828735, "grad_norm": 1.7568519115447998, "learning_rate": 8.61203680757118e-06, "loss": 0.839, "step": 7524 }, { "epoch": 0.26579657290199526, "grad_norm": 1.6271389722824097, "learning_rate": 8.61164125757041e-06, "loss": 0.8216, "step": 7525 }, { "epoch": 0.2658318947057031, "grad_norm": 1.626373529434204, "learning_rate": 8.611245660301156e-06, "loss": 0.8104, "step": 7526 }, { "epoch": 0.26586721650941103, "grad_norm": 1.7215633392333984, "learning_rate": 8.6108500157686e-06, "loss": 0.8314, "step": 7527 }, { "epoch": 0.26590253831311894, "grad_norm": 1.6535112857818604, "learning_rate": 8.610454323977916e-06, "loss": 0.8408, "step": 7528 }, { "epoch": 0.26593786011682685, "grad_norm": 2.0258538722991943, "learning_rate": 8.610058584934286e-06, "loss": 0.8411, "step": 7529 }, { "epoch": 0.26597318192053476, "grad_norm": 1.6182647943496704, "learning_rate": 8.609662798642886e-06, "loss": 0.8243, "step": 7530 }, { "epoch": 0.2660085037242427, "grad_norm": 1.6333562135696411, "learning_rate": 8.6092669651089e-06, "loss": 0.8354, "step": 7531 }, { "epoch": 0.2660438255279506, "grad_norm": 1.7083903551101685, "learning_rate": 8.608871084337504e-06, "loss": 0.8351, "step": 7532 }, { "epoch": 0.2660791473316585, "grad_norm": 1.7656145095825195, "learning_rate": 8.608475156333883e-06, "loss": 0.8264, "step": 7533 }, { "epoch": 0.2661144691353664, "grad_norm": 1.8198156356811523, "learning_rate": 8.608079181103217e-06, "loss": 0.8339, "step": 7534 }, { "epoch": 0.2661497909390743, "grad_norm": 1.7067193984985352, "learning_rate": 8.60768315865069e-06, "loss": 0.8468, "step": 7535 }, { "epoch": 0.2661851127427822, "grad_norm": 1.7835168838500977, "learning_rate": 8.607287088981485e-06, "loss": 0.8266, "step": 7536 }, { "epoch": 0.26622043454649014, "grad_norm": 1.6314713954925537, "learning_rate": 8.60689097210078e-06, "loss": 0.8415, "step": 7537 }, { "epoch": 0.26625575635019805, "grad_norm": 1.7410651445388794, "learning_rate": 8.606494808013768e-06, "loss": 0.8121, "step": 7538 }, { "epoch": 0.2662910781539059, "grad_norm": 1.6012998819351196, "learning_rate": 8.60609859672563e-06, "loss": 0.818, "step": 7539 }, { "epoch": 0.2663263999576138, "grad_norm": 1.6734784841537476, "learning_rate": 8.60570233824155e-06, "loss": 0.821, "step": 7540 }, { "epoch": 0.2663617217613217, "grad_norm": 1.8700352907180786, "learning_rate": 8.605306032566716e-06, "loss": 0.8574, "step": 7541 }, { "epoch": 0.26639704356502963, "grad_norm": 1.6677733659744263, "learning_rate": 8.604909679706314e-06, "loss": 0.8445, "step": 7542 }, { "epoch": 0.26643236536873754, "grad_norm": 1.8304953575134277, "learning_rate": 8.604513279665532e-06, "loss": 0.8916, "step": 7543 }, { "epoch": 0.26646768717244546, "grad_norm": 1.8520716428756714, "learning_rate": 8.604116832449559e-06, "loss": 0.8571, "step": 7544 }, { "epoch": 0.26650300897615337, "grad_norm": 1.9567077159881592, "learning_rate": 8.603720338063578e-06, "loss": 0.8513, "step": 7545 }, { "epoch": 0.2665383307798613, "grad_norm": 1.6283683776855469, "learning_rate": 8.603323796512788e-06, "loss": 0.8883, "step": 7546 }, { "epoch": 0.2665736525835692, "grad_norm": 1.726918339729309, "learning_rate": 8.602927207802372e-06, "loss": 0.842, "step": 7547 }, { "epoch": 0.2666089743872771, "grad_norm": 1.7578526735305786, "learning_rate": 8.60253057193752e-06, "loss": 0.8127, "step": 7548 }, { "epoch": 0.266644296190985, "grad_norm": 2.059976100921631, "learning_rate": 8.602133888923428e-06, "loss": 0.8617, "step": 7549 }, { "epoch": 0.2666796179946929, "grad_norm": 1.6198008060455322, "learning_rate": 8.601737158765281e-06, "loss": 0.8383, "step": 7550 }, { "epoch": 0.26671493979840083, "grad_norm": 1.721030354499817, "learning_rate": 8.601340381468279e-06, "loss": 0.8323, "step": 7551 }, { "epoch": 0.2667502616021087, "grad_norm": 1.868989109992981, "learning_rate": 8.60094355703761e-06, "loss": 0.8334, "step": 7552 }, { "epoch": 0.2667855834058166, "grad_norm": 1.7511416673660278, "learning_rate": 8.600546685478468e-06, "loss": 0.8466, "step": 7553 }, { "epoch": 0.2668209052095245, "grad_norm": 1.5762288570404053, "learning_rate": 8.600149766796047e-06, "loss": 0.8413, "step": 7554 }, { "epoch": 0.2668562270132324, "grad_norm": 1.1864200830459595, "learning_rate": 8.599752800995542e-06, "loss": 0.6085, "step": 7555 }, { "epoch": 0.2668915488169403, "grad_norm": 1.8662197589874268, "learning_rate": 8.599355788082152e-06, "loss": 0.8243, "step": 7556 }, { "epoch": 0.26692687062064824, "grad_norm": 1.7554377317428589, "learning_rate": 8.598958728061068e-06, "loss": 0.838, "step": 7557 }, { "epoch": 0.26696219242435615, "grad_norm": 1.8339531421661377, "learning_rate": 8.59856162093749e-06, "loss": 0.8471, "step": 7558 }, { "epoch": 0.26699751422806406, "grad_norm": 1.6483184099197388, "learning_rate": 8.598164466716612e-06, "loss": 0.8622, "step": 7559 }, { "epoch": 0.26703283603177197, "grad_norm": 1.6530128717422485, "learning_rate": 8.597767265403636e-06, "loss": 0.7961, "step": 7560 }, { "epoch": 0.2670681578354799, "grad_norm": 1.7291003465652466, "learning_rate": 8.597370017003757e-06, "loss": 0.8283, "step": 7561 }, { "epoch": 0.2671034796391878, "grad_norm": 1.6880308389663696, "learning_rate": 8.596972721522176e-06, "loss": 0.8409, "step": 7562 }, { "epoch": 0.2671388014428957, "grad_norm": 1.9633044004440308, "learning_rate": 8.596575378964092e-06, "loss": 0.8437, "step": 7563 }, { "epoch": 0.2671741232466036, "grad_norm": 1.7706677913665771, "learning_rate": 8.596177989334706e-06, "loss": 0.8427, "step": 7564 }, { "epoch": 0.26720944505031147, "grad_norm": 1.8432809114456177, "learning_rate": 8.595780552639216e-06, "loss": 0.8582, "step": 7565 }, { "epoch": 0.2672447668540194, "grad_norm": 1.666662335395813, "learning_rate": 8.59538306888283e-06, "loss": 0.8362, "step": 7566 }, { "epoch": 0.2672800886577273, "grad_norm": 1.7132805585861206, "learning_rate": 8.594985538070743e-06, "loss": 0.8371, "step": 7567 }, { "epoch": 0.2673154104614352, "grad_norm": 1.748424768447876, "learning_rate": 8.594587960208162e-06, "loss": 0.8198, "step": 7568 }, { "epoch": 0.2673507322651431, "grad_norm": 1.817557454109192, "learning_rate": 8.59419033530029e-06, "loss": 0.8336, "step": 7569 }, { "epoch": 0.267386054068851, "grad_norm": 1.5823856592178345, "learning_rate": 8.593792663352332e-06, "loss": 0.8491, "step": 7570 }, { "epoch": 0.26742137587255893, "grad_norm": 1.563523530960083, "learning_rate": 8.59339494436949e-06, "loss": 0.8188, "step": 7571 }, { "epoch": 0.26745669767626684, "grad_norm": 1.9062756299972534, "learning_rate": 8.592997178356967e-06, "loss": 0.7949, "step": 7572 }, { "epoch": 0.26749201947997475, "grad_norm": 2.3874735832214355, "learning_rate": 8.592599365319978e-06, "loss": 0.8623, "step": 7573 }, { "epoch": 0.26752734128368266, "grad_norm": 1.799714207649231, "learning_rate": 8.59220150526372e-06, "loss": 0.8217, "step": 7574 }, { "epoch": 0.2675626630873906, "grad_norm": 1.6555078029632568, "learning_rate": 8.591803598193405e-06, "loss": 0.843, "step": 7575 }, { "epoch": 0.2675979848910985, "grad_norm": 1.5995023250579834, "learning_rate": 8.59140564411424e-06, "loss": 0.8424, "step": 7576 }, { "epoch": 0.2676333066948064, "grad_norm": 1.942696213722229, "learning_rate": 8.591007643031432e-06, "loss": 0.8311, "step": 7577 }, { "epoch": 0.26766862849851425, "grad_norm": 1.8088563680648804, "learning_rate": 8.590609594950192e-06, "loss": 0.8458, "step": 7578 }, { "epoch": 0.26770395030222216, "grad_norm": 1.6684324741363525, "learning_rate": 8.59021149987573e-06, "loss": 0.8484, "step": 7579 }, { "epoch": 0.26773927210593007, "grad_norm": 1.8410474061965942, "learning_rate": 8.589813357813252e-06, "loss": 0.8724, "step": 7580 }, { "epoch": 0.267774593909638, "grad_norm": 1.6334131956100464, "learning_rate": 8.589415168767975e-06, "loss": 0.8108, "step": 7581 }, { "epoch": 0.2678099157133459, "grad_norm": 1.6629544496536255, "learning_rate": 8.589016932745103e-06, "loss": 0.8539, "step": 7582 }, { "epoch": 0.2678452375170538, "grad_norm": 1.9278335571289062, "learning_rate": 8.588618649749853e-06, "loss": 0.8311, "step": 7583 }, { "epoch": 0.2678805593207617, "grad_norm": 1.7895225286483765, "learning_rate": 8.58822031978744e-06, "loss": 0.8731, "step": 7584 }, { "epoch": 0.2679158811244696, "grad_norm": 1.7408039569854736, "learning_rate": 8.58782194286307e-06, "loss": 0.8516, "step": 7585 }, { "epoch": 0.26795120292817753, "grad_norm": 1.721783995628357, "learning_rate": 8.587423518981964e-06, "loss": 0.8199, "step": 7586 }, { "epoch": 0.26798652473188544, "grad_norm": 1.9222530126571655, "learning_rate": 8.58702504814933e-06, "loss": 0.8346, "step": 7587 }, { "epoch": 0.26802184653559336, "grad_norm": 1.5824095010757446, "learning_rate": 8.586626530370388e-06, "loss": 0.8314, "step": 7588 }, { "epoch": 0.26805716833930127, "grad_norm": 1.6114393472671509, "learning_rate": 8.586227965650353e-06, "loss": 0.8352, "step": 7589 }, { "epoch": 0.2680924901430092, "grad_norm": 1.7633116245269775, "learning_rate": 8.58582935399444e-06, "loss": 0.8505, "step": 7590 }, { "epoch": 0.26812781194671703, "grad_norm": 1.8894586563110352, "learning_rate": 8.585430695407866e-06, "loss": 0.8295, "step": 7591 }, { "epoch": 0.26816313375042494, "grad_norm": 1.6124756336212158, "learning_rate": 8.585031989895848e-06, "loss": 0.8132, "step": 7592 }, { "epoch": 0.26819845555413285, "grad_norm": 1.6983007192611694, "learning_rate": 8.58463323746361e-06, "loss": 0.8347, "step": 7593 }, { "epoch": 0.26823377735784076, "grad_norm": 1.5904918909072876, "learning_rate": 8.584234438116362e-06, "loss": 0.7955, "step": 7594 }, { "epoch": 0.2682690991615487, "grad_norm": 1.573021650314331, "learning_rate": 8.583835591859327e-06, "loss": 0.8127, "step": 7595 }, { "epoch": 0.2683044209652566, "grad_norm": 1.7135496139526367, "learning_rate": 8.583436698697727e-06, "loss": 0.8566, "step": 7596 }, { "epoch": 0.2683397427689645, "grad_norm": 1.9024261236190796, "learning_rate": 8.58303775863678e-06, "loss": 0.8781, "step": 7597 }, { "epoch": 0.2683750645726724, "grad_norm": 1.8071523904800415, "learning_rate": 8.58263877168171e-06, "loss": 0.8601, "step": 7598 }, { "epoch": 0.2684103863763803, "grad_norm": 1.6183421611785889, "learning_rate": 8.582239737837736e-06, "loss": 0.8358, "step": 7599 }, { "epoch": 0.2684457081800882, "grad_norm": 1.730629563331604, "learning_rate": 8.58184065711008e-06, "loss": 0.8083, "step": 7600 }, { "epoch": 0.26848102998379614, "grad_norm": 1.814213514328003, "learning_rate": 8.58144152950397e-06, "loss": 0.8505, "step": 7601 }, { "epoch": 0.26851635178750405, "grad_norm": 1.809717059135437, "learning_rate": 8.581042355024625e-06, "loss": 0.8493, "step": 7602 }, { "epoch": 0.26855167359121196, "grad_norm": 1.75586998462677, "learning_rate": 8.58064313367727e-06, "loss": 0.8605, "step": 7603 }, { "epoch": 0.2685869953949198, "grad_norm": 1.7842448949813843, "learning_rate": 8.580243865467133e-06, "loss": 0.8463, "step": 7604 }, { "epoch": 0.2686223171986277, "grad_norm": 1.5158236026763916, "learning_rate": 8.579844550399437e-06, "loss": 0.8629, "step": 7605 }, { "epoch": 0.26865763900233564, "grad_norm": 1.9235856533050537, "learning_rate": 8.579445188479408e-06, "loss": 0.846, "step": 7606 }, { "epoch": 0.26869296080604355, "grad_norm": 1.7902027368545532, "learning_rate": 8.579045779712271e-06, "loss": 0.8124, "step": 7607 }, { "epoch": 0.26872828260975146, "grad_norm": 1.7475484609603882, "learning_rate": 8.578646324103258e-06, "loss": 0.863, "step": 7608 }, { "epoch": 0.26876360441345937, "grad_norm": 1.8165678977966309, "learning_rate": 8.578246821657595e-06, "loss": 0.8232, "step": 7609 }, { "epoch": 0.2687989262171673, "grad_norm": 1.5827776193618774, "learning_rate": 8.577847272380508e-06, "loss": 0.8251, "step": 7610 }, { "epoch": 0.2688342480208752, "grad_norm": 2.0924136638641357, "learning_rate": 8.57744767627723e-06, "loss": 0.8223, "step": 7611 }, { "epoch": 0.2688695698245831, "grad_norm": 1.8408564329147339, "learning_rate": 8.577048033352989e-06, "loss": 0.8565, "step": 7612 }, { "epoch": 0.268904891628291, "grad_norm": 1.814363956451416, "learning_rate": 8.576648343613016e-06, "loss": 0.8372, "step": 7613 }, { "epoch": 0.2689402134319989, "grad_norm": 1.755839467048645, "learning_rate": 8.57624860706254e-06, "loss": 0.8086, "step": 7614 }, { "epoch": 0.26897553523570683, "grad_norm": 1.908537745475769, "learning_rate": 8.575848823706796e-06, "loss": 0.8433, "step": 7615 }, { "epoch": 0.26901085703941474, "grad_norm": 1.7241358757019043, "learning_rate": 8.575448993551016e-06, "loss": 0.8557, "step": 7616 }, { "epoch": 0.2690461788431226, "grad_norm": 1.6058422327041626, "learning_rate": 8.575049116600432e-06, "loss": 0.8702, "step": 7617 }, { "epoch": 0.2690815006468305, "grad_norm": 1.823866367340088, "learning_rate": 8.574649192860274e-06, "loss": 0.8479, "step": 7618 }, { "epoch": 0.2691168224505384, "grad_norm": 1.6429576873779297, "learning_rate": 8.574249222335782e-06, "loss": 0.8629, "step": 7619 }, { "epoch": 0.26915214425424633, "grad_norm": 1.7192219495773315, "learning_rate": 8.573849205032187e-06, "loss": 0.8391, "step": 7620 }, { "epoch": 0.26918746605795424, "grad_norm": 2.096308946609497, "learning_rate": 8.573449140954726e-06, "loss": 0.8281, "step": 7621 }, { "epoch": 0.26922278786166215, "grad_norm": 1.7483946084976196, "learning_rate": 8.573049030108633e-06, "loss": 0.812, "step": 7622 }, { "epoch": 0.26925810966537006, "grad_norm": 1.7132102251052856, "learning_rate": 8.572648872499148e-06, "loss": 0.8754, "step": 7623 }, { "epoch": 0.26929343146907797, "grad_norm": 1.7351619005203247, "learning_rate": 8.572248668131505e-06, "loss": 0.831, "step": 7624 }, { "epoch": 0.2693287532727859, "grad_norm": 1.7610102891921997, "learning_rate": 8.571848417010942e-06, "loss": 0.85, "step": 7625 }, { "epoch": 0.2693640750764938, "grad_norm": 1.8451370000839233, "learning_rate": 8.571448119142701e-06, "loss": 0.8585, "step": 7626 }, { "epoch": 0.2693993968802017, "grad_norm": 2.9205687046051025, "learning_rate": 8.571047774532016e-06, "loss": 0.8284, "step": 7627 }, { "epoch": 0.2694347186839096, "grad_norm": 1.646491289138794, "learning_rate": 8.570647383184128e-06, "loss": 0.8484, "step": 7628 }, { "epoch": 0.2694700404876175, "grad_norm": 1.653563141822815, "learning_rate": 8.57024694510428e-06, "loss": 0.8284, "step": 7629 }, { "epoch": 0.2695053622913254, "grad_norm": 1.6396677494049072, "learning_rate": 8.569846460297712e-06, "loss": 0.8271, "step": 7630 }, { "epoch": 0.2695406840950333, "grad_norm": 1.7545323371887207, "learning_rate": 8.569445928769664e-06, "loss": 0.8325, "step": 7631 }, { "epoch": 0.2695760058987412, "grad_norm": 1.8159003257751465, "learning_rate": 8.569045350525378e-06, "loss": 0.8472, "step": 7632 }, { "epoch": 0.2696113277024491, "grad_norm": 2.100010633468628, "learning_rate": 8.568644725570096e-06, "loss": 0.8002, "step": 7633 }, { "epoch": 0.269646649506157, "grad_norm": 1.6428943872451782, "learning_rate": 8.568244053909065e-06, "loss": 0.8497, "step": 7634 }, { "epoch": 0.26968197130986493, "grad_norm": 1.816251516342163, "learning_rate": 8.567843335547527e-06, "loss": 0.837, "step": 7635 }, { "epoch": 0.26971729311357284, "grad_norm": 1.728033185005188, "learning_rate": 8.567442570490726e-06, "loss": 0.8046, "step": 7636 }, { "epoch": 0.26975261491728075, "grad_norm": 1.6351641416549683, "learning_rate": 8.567041758743906e-06, "loss": 0.8229, "step": 7637 }, { "epoch": 0.26978793672098866, "grad_norm": 1.6729704141616821, "learning_rate": 8.566640900312314e-06, "loss": 0.8629, "step": 7638 }, { "epoch": 0.2698232585246966, "grad_norm": 2.0488505363464355, "learning_rate": 8.566239995201195e-06, "loss": 0.821, "step": 7639 }, { "epoch": 0.2698585803284045, "grad_norm": 1.8149316310882568, "learning_rate": 8.5658390434158e-06, "loss": 0.8222, "step": 7640 }, { "epoch": 0.2698939021321124, "grad_norm": 1.7029935121536255, "learning_rate": 8.565438044961373e-06, "loss": 0.8554, "step": 7641 }, { "epoch": 0.2699292239358203, "grad_norm": 1.8285211324691772, "learning_rate": 8.565036999843163e-06, "loss": 0.8565, "step": 7642 }, { "epoch": 0.2699645457395282, "grad_norm": 1.9851919412612915, "learning_rate": 8.564635908066418e-06, "loss": 0.845, "step": 7643 }, { "epoch": 0.26999986754323607, "grad_norm": 1.9090523719787598, "learning_rate": 8.56423476963639e-06, "loss": 0.8612, "step": 7644 }, { "epoch": 0.270035189346944, "grad_norm": 1.7109761238098145, "learning_rate": 8.563833584558326e-06, "loss": 0.8334, "step": 7645 }, { "epoch": 0.2700705111506519, "grad_norm": 1.770906925201416, "learning_rate": 8.563432352837479e-06, "loss": 0.866, "step": 7646 }, { "epoch": 0.2701058329543598, "grad_norm": 1.6956208944320679, "learning_rate": 8.563031074479098e-06, "loss": 0.8319, "step": 7647 }, { "epoch": 0.2701411547580677, "grad_norm": 1.6385831832885742, "learning_rate": 8.562629749488438e-06, "loss": 0.83, "step": 7648 }, { "epoch": 0.2701764765617756, "grad_norm": 1.9061816930770874, "learning_rate": 8.562228377870746e-06, "loss": 0.8496, "step": 7649 }, { "epoch": 0.27021179836548354, "grad_norm": 1.6449425220489502, "learning_rate": 8.561826959631283e-06, "loss": 0.8514, "step": 7650 }, { "epoch": 0.27024712016919145, "grad_norm": 1.6235146522521973, "learning_rate": 8.561425494775295e-06, "loss": 0.8189, "step": 7651 }, { "epoch": 0.27028244197289936, "grad_norm": 1.0621956586837769, "learning_rate": 8.56102398330804e-06, "loss": 0.5609, "step": 7652 }, { "epoch": 0.27031776377660727, "grad_norm": 1.7323638200759888, "learning_rate": 8.560622425234774e-06, "loss": 0.8477, "step": 7653 }, { "epoch": 0.2703530855803152, "grad_norm": 1.6440989971160889, "learning_rate": 8.560220820560751e-06, "loss": 0.8053, "step": 7654 }, { "epoch": 0.2703884073840231, "grad_norm": 1.7164546251296997, "learning_rate": 8.559819169291226e-06, "loss": 0.8681, "step": 7655 }, { "epoch": 0.270423729187731, "grad_norm": 1.7302651405334473, "learning_rate": 8.559417471431458e-06, "loss": 0.8369, "step": 7656 }, { "epoch": 0.27045905099143885, "grad_norm": 1.729460597038269, "learning_rate": 8.559015726986703e-06, "loss": 0.8442, "step": 7657 }, { "epoch": 0.27049437279514676, "grad_norm": 1.729910969734192, "learning_rate": 8.558613935962218e-06, "loss": 0.8289, "step": 7658 }, { "epoch": 0.2705296945988547, "grad_norm": 1.878300666809082, "learning_rate": 8.558212098363263e-06, "loss": 0.851, "step": 7659 }, { "epoch": 0.2705650164025626, "grad_norm": 1.6212081909179688, "learning_rate": 8.557810214195098e-06, "loss": 0.8232, "step": 7660 }, { "epoch": 0.2706003382062705, "grad_norm": 1.7383366823196411, "learning_rate": 8.557408283462982e-06, "loss": 0.822, "step": 7661 }, { "epoch": 0.2706356600099784, "grad_norm": 1.8978691101074219, "learning_rate": 8.557006306172172e-06, "loss": 0.876, "step": 7662 }, { "epoch": 0.2706709818136863, "grad_norm": 1.7575688362121582, "learning_rate": 8.556604282327936e-06, "loss": 0.8718, "step": 7663 }, { "epoch": 0.27070630361739423, "grad_norm": 1.7132046222686768, "learning_rate": 8.556202211935531e-06, "loss": 0.8414, "step": 7664 }, { "epoch": 0.27074162542110214, "grad_norm": 1.8945868015289307, "learning_rate": 8.555800095000219e-06, "loss": 0.8549, "step": 7665 }, { "epoch": 0.27077694722481005, "grad_norm": 1.7748825550079346, "learning_rate": 8.555397931527263e-06, "loss": 0.8592, "step": 7666 }, { "epoch": 0.27081226902851796, "grad_norm": 1.6281574964523315, "learning_rate": 8.554995721521928e-06, "loss": 0.8471, "step": 7667 }, { "epoch": 0.27084759083222587, "grad_norm": 1.7908114194869995, "learning_rate": 8.554593464989479e-06, "loss": 0.8303, "step": 7668 }, { "epoch": 0.2708829126359338, "grad_norm": 1.6684081554412842, "learning_rate": 8.554191161935177e-06, "loss": 0.8625, "step": 7669 }, { "epoch": 0.27091823443964164, "grad_norm": 1.70578932762146, "learning_rate": 8.553788812364288e-06, "loss": 0.8459, "step": 7670 }, { "epoch": 0.27095355624334955, "grad_norm": 1.7776628732681274, "learning_rate": 8.55338641628208e-06, "loss": 0.8797, "step": 7671 }, { "epoch": 0.27098887804705746, "grad_norm": 1.8625646829605103, "learning_rate": 8.55298397369382e-06, "loss": 0.8635, "step": 7672 }, { "epoch": 0.27102419985076537, "grad_norm": 1.9837180376052856, "learning_rate": 8.552581484604772e-06, "loss": 0.8476, "step": 7673 }, { "epoch": 0.2710595216544733, "grad_norm": 2.186875343322754, "learning_rate": 8.552178949020207e-06, "loss": 0.8421, "step": 7674 }, { "epoch": 0.2710948434581812, "grad_norm": 1.7504584789276123, "learning_rate": 8.55177636694539e-06, "loss": 0.8412, "step": 7675 }, { "epoch": 0.2711301652618891, "grad_norm": 1.8616812229156494, "learning_rate": 8.551373738385592e-06, "loss": 0.8534, "step": 7676 }, { "epoch": 0.271165487065597, "grad_norm": 1.5612109899520874, "learning_rate": 8.550971063346083e-06, "loss": 0.828, "step": 7677 }, { "epoch": 0.2712008088693049, "grad_norm": 1.856327772140503, "learning_rate": 8.550568341832131e-06, "loss": 0.8101, "step": 7678 }, { "epoch": 0.27123613067301283, "grad_norm": 1.517882227897644, "learning_rate": 8.55016557384901e-06, "loss": 0.8079, "step": 7679 }, { "epoch": 0.27127145247672074, "grad_norm": 1.5614690780639648, "learning_rate": 8.549762759401987e-06, "loss": 0.7962, "step": 7680 }, { "epoch": 0.27130677428042865, "grad_norm": 1.706100344657898, "learning_rate": 8.549359898496337e-06, "loss": 0.8081, "step": 7681 }, { "epoch": 0.27134209608413656, "grad_norm": 1.6761112213134766, "learning_rate": 8.548956991137333e-06, "loss": 0.8072, "step": 7682 }, { "epoch": 0.2713774178878444, "grad_norm": 1.8370529413223267, "learning_rate": 8.548554037330245e-06, "loss": 0.872, "step": 7683 }, { "epoch": 0.27141273969155233, "grad_norm": 1.539778232574463, "learning_rate": 8.54815103708035e-06, "loss": 0.8407, "step": 7684 }, { "epoch": 0.27144806149526024, "grad_norm": 1.7211086750030518, "learning_rate": 8.547747990392921e-06, "loss": 0.8646, "step": 7685 }, { "epoch": 0.27148338329896815, "grad_norm": 1.5179023742675781, "learning_rate": 8.547344897273233e-06, "loss": 0.8036, "step": 7686 }, { "epoch": 0.27151870510267606, "grad_norm": 1.677051305770874, "learning_rate": 8.546941757726564e-06, "loss": 0.807, "step": 7687 }, { "epoch": 0.27155402690638397, "grad_norm": 1.7166935205459595, "learning_rate": 8.546538571758186e-06, "loss": 0.8221, "step": 7688 }, { "epoch": 0.2715893487100919, "grad_norm": 2.5419692993164062, "learning_rate": 8.546135339373378e-06, "loss": 0.7869, "step": 7689 }, { "epoch": 0.2716246705137998, "grad_norm": 1.6068592071533203, "learning_rate": 8.545732060577416e-06, "loss": 0.8696, "step": 7690 }, { "epoch": 0.2716599923175077, "grad_norm": 1.6612921953201294, "learning_rate": 8.54532873537558e-06, "loss": 0.8152, "step": 7691 }, { "epoch": 0.2716953141212156, "grad_norm": 1.685510277748108, "learning_rate": 8.54492536377315e-06, "loss": 0.8271, "step": 7692 }, { "epoch": 0.2717306359249235, "grad_norm": 1.8905303478240967, "learning_rate": 8.5445219457754e-06, "loss": 0.8319, "step": 7693 }, { "epoch": 0.27176595772863144, "grad_norm": 1.6516937017440796, "learning_rate": 8.544118481387615e-06, "loss": 0.8341, "step": 7694 }, { "epoch": 0.27180127953233935, "grad_norm": 1.7808239459991455, "learning_rate": 8.543714970615074e-06, "loss": 0.8411, "step": 7695 }, { "epoch": 0.2718366013360472, "grad_norm": 1.7242683172225952, "learning_rate": 8.543311413463057e-06, "loss": 0.8242, "step": 7696 }, { "epoch": 0.2718719231397551, "grad_norm": 1.7984386682510376, "learning_rate": 8.542907809936846e-06, "loss": 0.8352, "step": 7697 }, { "epoch": 0.271907244943463, "grad_norm": 1.7292637825012207, "learning_rate": 8.542504160041725e-06, "loss": 0.872, "step": 7698 }, { "epoch": 0.27194256674717093, "grad_norm": 1.6511088609695435, "learning_rate": 8.542100463782972e-06, "loss": 0.8419, "step": 7699 }, { "epoch": 0.27197788855087884, "grad_norm": 1.9270281791687012, "learning_rate": 8.541696721165878e-06, "loss": 0.8248, "step": 7700 }, { "epoch": 0.27201321035458675, "grad_norm": 1.7379602193832397, "learning_rate": 8.54129293219572e-06, "loss": 0.8211, "step": 7701 }, { "epoch": 0.27204853215829466, "grad_norm": 1.584855079650879, "learning_rate": 8.540889096877789e-06, "loss": 0.8156, "step": 7702 }, { "epoch": 0.2720838539620026, "grad_norm": 1.815344214439392, "learning_rate": 8.540485215217365e-06, "loss": 0.8464, "step": 7703 }, { "epoch": 0.2721191757657105, "grad_norm": 1.7452248334884644, "learning_rate": 8.540081287219736e-06, "loss": 0.8324, "step": 7704 }, { "epoch": 0.2721544975694184, "grad_norm": 1.5300743579864502, "learning_rate": 8.539677312890189e-06, "loss": 0.8178, "step": 7705 }, { "epoch": 0.2721898193731263, "grad_norm": 1.556362271308899, "learning_rate": 8.53927329223401e-06, "loss": 0.8424, "step": 7706 }, { "epoch": 0.2722251411768342, "grad_norm": 1.8978058099746704, "learning_rate": 8.538869225256487e-06, "loss": 0.8104, "step": 7707 }, { "epoch": 0.27226046298054213, "grad_norm": 1.666046142578125, "learning_rate": 8.538465111962911e-06, "loss": 0.8346, "step": 7708 }, { "epoch": 0.27229578478425, "grad_norm": 1.7497532367706299, "learning_rate": 8.538060952358568e-06, "loss": 0.8256, "step": 7709 }, { "epoch": 0.2723311065879579, "grad_norm": 1.9413210153579712, "learning_rate": 8.537656746448748e-06, "loss": 0.8848, "step": 7710 }, { "epoch": 0.2723664283916658, "grad_norm": 1.8092589378356934, "learning_rate": 8.537252494238742e-06, "loss": 0.8286, "step": 7711 }, { "epoch": 0.2724017501953737, "grad_norm": 1.7867894172668457, "learning_rate": 8.536848195733838e-06, "loss": 0.8663, "step": 7712 }, { "epoch": 0.2724370719990816, "grad_norm": 1.6318199634552002, "learning_rate": 8.536443850939332e-06, "loss": 0.856, "step": 7713 }, { "epoch": 0.27247239380278954, "grad_norm": 1.8075480461120605, "learning_rate": 8.536039459860514e-06, "loss": 0.8535, "step": 7714 }, { "epoch": 0.27250771560649745, "grad_norm": 2.364410638809204, "learning_rate": 8.535635022502675e-06, "loss": 0.8258, "step": 7715 }, { "epoch": 0.27254303741020536, "grad_norm": 1.6289300918579102, "learning_rate": 8.53523053887111e-06, "loss": 0.8626, "step": 7716 }, { "epoch": 0.27257835921391327, "grad_norm": 1.7432218790054321, "learning_rate": 8.534826008971112e-06, "loss": 0.8111, "step": 7717 }, { "epoch": 0.2726136810176212, "grad_norm": 1.6825449466705322, "learning_rate": 8.534421432807976e-06, "loss": 0.8477, "step": 7718 }, { "epoch": 0.2726490028213291, "grad_norm": 1.737165927886963, "learning_rate": 8.534016810386996e-06, "loss": 0.8503, "step": 7719 }, { "epoch": 0.272684324625037, "grad_norm": 2.23726487159729, "learning_rate": 8.533612141713468e-06, "loss": 0.8353, "step": 7720 }, { "epoch": 0.2727196464287449, "grad_norm": 1.631518006324768, "learning_rate": 8.53320742679269e-06, "loss": 0.824, "step": 7721 }, { "epoch": 0.27275496823245277, "grad_norm": 1.7317296266555786, "learning_rate": 8.532802665629955e-06, "loss": 0.8558, "step": 7722 }, { "epoch": 0.2727902900361607, "grad_norm": 1.8555889129638672, "learning_rate": 8.532397858230565e-06, "loss": 0.8538, "step": 7723 }, { "epoch": 0.2728256118398686, "grad_norm": 1.753090262413025, "learning_rate": 8.531993004599816e-06, "loss": 0.8456, "step": 7724 }, { "epoch": 0.2728609336435765, "grad_norm": 1.7306933403015137, "learning_rate": 8.531588104743005e-06, "loss": 0.866, "step": 7725 }, { "epoch": 0.2728962554472844, "grad_norm": 1.5948034524917603, "learning_rate": 8.531183158665435e-06, "loss": 0.8133, "step": 7726 }, { "epoch": 0.2729315772509923, "grad_norm": 1.8686885833740234, "learning_rate": 8.530778166372402e-06, "loss": 0.8614, "step": 7727 }, { "epoch": 0.27296689905470023, "grad_norm": 1.6637383699417114, "learning_rate": 8.530373127869207e-06, "loss": 0.8607, "step": 7728 }, { "epoch": 0.27300222085840814, "grad_norm": 1.6057456731796265, "learning_rate": 8.529968043161154e-06, "loss": 0.8393, "step": 7729 }, { "epoch": 0.27303754266211605, "grad_norm": 1.6784241199493408, "learning_rate": 8.529562912253542e-06, "loss": 0.8488, "step": 7730 }, { "epoch": 0.27307286446582396, "grad_norm": 1.6396678686141968, "learning_rate": 8.529157735151674e-06, "loss": 0.8299, "step": 7731 }, { "epoch": 0.27310818626953187, "grad_norm": 1.7750389575958252, "learning_rate": 8.528752511860854e-06, "loss": 0.8314, "step": 7732 }, { "epoch": 0.2731435080732398, "grad_norm": 1.7504026889801025, "learning_rate": 8.528347242386384e-06, "loss": 0.8267, "step": 7733 }, { "epoch": 0.2731788298769477, "grad_norm": 1.5385814905166626, "learning_rate": 8.527941926733568e-06, "loss": 0.8221, "step": 7734 }, { "epoch": 0.27321415168065555, "grad_norm": 1.760867953300476, "learning_rate": 8.527536564907712e-06, "loss": 0.8486, "step": 7735 }, { "epoch": 0.27324947348436346, "grad_norm": 1.7036181688308716, "learning_rate": 8.527131156914119e-06, "loss": 0.8954, "step": 7736 }, { "epoch": 0.27328479528807137, "grad_norm": 1.8377037048339844, "learning_rate": 8.526725702758097e-06, "loss": 0.8122, "step": 7737 }, { "epoch": 0.2733201170917793, "grad_norm": 1.8277937173843384, "learning_rate": 8.526320202444953e-06, "loss": 0.8309, "step": 7738 }, { "epoch": 0.2733554388954872, "grad_norm": 1.5526750087738037, "learning_rate": 8.525914655979993e-06, "loss": 0.8697, "step": 7739 }, { "epoch": 0.2733907606991951, "grad_norm": 1.8404057025909424, "learning_rate": 8.525509063368525e-06, "loss": 0.8191, "step": 7740 }, { "epoch": 0.273426082502903, "grad_norm": 2.037411689758301, "learning_rate": 8.525103424615855e-06, "loss": 0.864, "step": 7741 }, { "epoch": 0.2734614043066109, "grad_norm": 1.5579216480255127, "learning_rate": 8.524697739727295e-06, "loss": 0.8154, "step": 7742 }, { "epoch": 0.27349672611031883, "grad_norm": 1.9812320470809937, "learning_rate": 8.524292008708155e-06, "loss": 0.8326, "step": 7743 }, { "epoch": 0.27353204791402674, "grad_norm": 1.9518693685531616, "learning_rate": 8.523886231563742e-06, "loss": 0.8361, "step": 7744 }, { "epoch": 0.27356736971773465, "grad_norm": 1.7217963933944702, "learning_rate": 8.52348040829937e-06, "loss": 0.8452, "step": 7745 }, { "epoch": 0.27360269152144256, "grad_norm": 2.024732828140259, "learning_rate": 8.523074538920346e-06, "loss": 0.8182, "step": 7746 }, { "epoch": 0.2736380133251505, "grad_norm": 1.6843067407608032, "learning_rate": 8.522668623431987e-06, "loss": 0.8343, "step": 7747 }, { "epoch": 0.27367333512885833, "grad_norm": 1.5817995071411133, "learning_rate": 8.522262661839602e-06, "loss": 0.8016, "step": 7748 }, { "epoch": 0.27370865693256624, "grad_norm": 1.6421433687210083, "learning_rate": 8.521856654148507e-06, "loss": 0.853, "step": 7749 }, { "epoch": 0.27374397873627415, "grad_norm": 1.903346300125122, "learning_rate": 8.521450600364013e-06, "loss": 0.8339, "step": 7750 }, { "epoch": 0.27377930053998206, "grad_norm": 1.9036121368408203, "learning_rate": 8.521044500491435e-06, "loss": 0.7892, "step": 7751 }, { "epoch": 0.27381462234369, "grad_norm": 1.622685194015503, "learning_rate": 8.52063835453609e-06, "loss": 0.8334, "step": 7752 }, { "epoch": 0.2738499441473979, "grad_norm": 1.7268496751785278, "learning_rate": 8.520232162503292e-06, "loss": 0.8601, "step": 7753 }, { "epoch": 0.2738852659511058, "grad_norm": 2.072133779525757, "learning_rate": 8.519825924398358e-06, "loss": 0.8916, "step": 7754 }, { "epoch": 0.2739205877548137, "grad_norm": 1.6469882726669312, "learning_rate": 8.519419640226601e-06, "loss": 0.8332, "step": 7755 }, { "epoch": 0.2739559095585216, "grad_norm": 2.101372718811035, "learning_rate": 8.519013309993342e-06, "loss": 0.8419, "step": 7756 }, { "epoch": 0.2739912313622295, "grad_norm": 1.9739996194839478, "learning_rate": 8.5186069337039e-06, "loss": 0.8317, "step": 7757 }, { "epoch": 0.27402655316593744, "grad_norm": 1.6876888275146484, "learning_rate": 8.51820051136359e-06, "loss": 0.8222, "step": 7758 }, { "epoch": 0.27406187496964535, "grad_norm": 1.6924532651901245, "learning_rate": 8.517794042977734e-06, "loss": 0.8636, "step": 7759 }, { "epoch": 0.27409719677335326, "grad_norm": 1.7553892135620117, "learning_rate": 8.517387528551651e-06, "loss": 0.8255, "step": 7760 }, { "epoch": 0.2741325185770611, "grad_norm": 1.6882874965667725, "learning_rate": 8.51698096809066e-06, "loss": 0.8201, "step": 7761 }, { "epoch": 0.274167840380769, "grad_norm": 1.624058723449707, "learning_rate": 8.516574361600086e-06, "loss": 0.8233, "step": 7762 }, { "epoch": 0.27420316218447693, "grad_norm": 1.8469215631484985, "learning_rate": 8.516167709085246e-06, "loss": 0.8345, "step": 7763 }, { "epoch": 0.27423848398818484, "grad_norm": 1.8282103538513184, "learning_rate": 8.515761010551465e-06, "loss": 0.8467, "step": 7764 }, { "epoch": 0.27427380579189276, "grad_norm": 1.828845739364624, "learning_rate": 8.515354266004064e-06, "loss": 0.8158, "step": 7765 }, { "epoch": 0.27430912759560067, "grad_norm": 1.5942145586013794, "learning_rate": 8.514947475448366e-06, "loss": 0.8137, "step": 7766 }, { "epoch": 0.2743444493993086, "grad_norm": 1.659360647201538, "learning_rate": 8.514540638889697e-06, "loss": 0.8455, "step": 7767 }, { "epoch": 0.2743797712030165, "grad_norm": 1.793605089187622, "learning_rate": 8.51413375633338e-06, "loss": 0.8739, "step": 7768 }, { "epoch": 0.2744150930067244, "grad_norm": 1.80021333694458, "learning_rate": 8.513726827784742e-06, "loss": 0.8606, "step": 7769 }, { "epoch": 0.2744504148104323, "grad_norm": 1.6249371767044067, "learning_rate": 8.51331985324911e-06, "loss": 0.8371, "step": 7770 }, { "epoch": 0.2744857366141402, "grad_norm": 1.7530118227005005, "learning_rate": 8.512912832731806e-06, "loss": 0.8376, "step": 7771 }, { "epoch": 0.27452105841784813, "grad_norm": 1.6964131593704224, "learning_rate": 8.512505766238158e-06, "loss": 0.8311, "step": 7772 }, { "epoch": 0.27455638022155604, "grad_norm": 1.7485268115997314, "learning_rate": 8.512098653773496e-06, "loss": 0.8652, "step": 7773 }, { "epoch": 0.2745917020252639, "grad_norm": 1.923867106437683, "learning_rate": 8.511691495343147e-06, "loss": 0.8204, "step": 7774 }, { "epoch": 0.2746270238289718, "grad_norm": 1.7491955757141113, "learning_rate": 8.51128429095244e-06, "loss": 0.8657, "step": 7775 }, { "epoch": 0.2746623456326797, "grad_norm": 1.9191174507141113, "learning_rate": 8.510877040606702e-06, "loss": 0.84, "step": 7776 }, { "epoch": 0.2746976674363876, "grad_norm": 1.6173187494277954, "learning_rate": 8.510469744311269e-06, "loss": 0.8504, "step": 7777 }, { "epoch": 0.27473298924009554, "grad_norm": 1.5856071710586548, "learning_rate": 8.510062402071466e-06, "loss": 0.805, "step": 7778 }, { "epoch": 0.27476831104380345, "grad_norm": 1.9795423746109009, "learning_rate": 8.509655013892626e-06, "loss": 0.8498, "step": 7779 }, { "epoch": 0.27480363284751136, "grad_norm": 1.675598382949829, "learning_rate": 8.509247579780081e-06, "loss": 0.805, "step": 7780 }, { "epoch": 0.27483895465121927, "grad_norm": 0.926827609539032, "learning_rate": 8.508840099739165e-06, "loss": 0.5978, "step": 7781 }, { "epoch": 0.2748742764549272, "grad_norm": 1.73210871219635, "learning_rate": 8.508432573775208e-06, "loss": 0.8703, "step": 7782 }, { "epoch": 0.2749095982586351, "grad_norm": 2.06272029876709, "learning_rate": 8.508025001893544e-06, "loss": 0.8651, "step": 7783 }, { "epoch": 0.274944920062343, "grad_norm": 1.6279433965682983, "learning_rate": 8.507617384099509e-06, "loss": 0.8491, "step": 7784 }, { "epoch": 0.2749802418660509, "grad_norm": 1.5274380445480347, "learning_rate": 8.507209720398437e-06, "loss": 0.8375, "step": 7785 }, { "epoch": 0.2750155636697588, "grad_norm": 1.668071985244751, "learning_rate": 8.506802010795664e-06, "loss": 0.8093, "step": 7786 }, { "epoch": 0.2750508854734667, "grad_norm": 1.7076308727264404, "learning_rate": 8.506394255296524e-06, "loss": 0.8208, "step": 7787 }, { "epoch": 0.2750862072771746, "grad_norm": 2.256025552749634, "learning_rate": 8.505986453906357e-06, "loss": 0.8529, "step": 7788 }, { "epoch": 0.2751215290808825, "grad_norm": 1.8066318035125732, "learning_rate": 8.505578606630497e-06, "loss": 0.8651, "step": 7789 }, { "epoch": 0.2751568508845904, "grad_norm": 1.52605402469635, "learning_rate": 8.505170713474284e-06, "loss": 0.8123, "step": 7790 }, { "epoch": 0.2751921726882983, "grad_norm": 1.7527258396148682, "learning_rate": 8.504762774443057e-06, "loss": 0.7973, "step": 7791 }, { "epoch": 0.27522749449200623, "grad_norm": 1.7783620357513428, "learning_rate": 8.504354789542152e-06, "loss": 0.8243, "step": 7792 }, { "epoch": 0.27526281629571414, "grad_norm": 1.7529442310333252, "learning_rate": 8.50394675877691e-06, "loss": 0.8307, "step": 7793 }, { "epoch": 0.27529813809942205, "grad_norm": 1.4341261386871338, "learning_rate": 8.503538682152671e-06, "loss": 0.8215, "step": 7794 }, { "epoch": 0.27533345990312996, "grad_norm": 1.6443370580673218, "learning_rate": 8.503130559674777e-06, "loss": 0.8601, "step": 7795 }, { "epoch": 0.2753687817068379, "grad_norm": 1.6478112936019897, "learning_rate": 8.502722391348567e-06, "loss": 0.8348, "step": 7796 }, { "epoch": 0.2754041035105458, "grad_norm": 1.837496042251587, "learning_rate": 8.502314177179387e-06, "loss": 0.8347, "step": 7797 }, { "epoch": 0.2754394253142537, "grad_norm": 1.897277593612671, "learning_rate": 8.501905917172579e-06, "loss": 0.869, "step": 7798 }, { "epoch": 0.2754747471179616, "grad_norm": 1.5880358219146729, "learning_rate": 8.501497611333482e-06, "loss": 0.8601, "step": 7799 }, { "epoch": 0.27551006892166946, "grad_norm": 1.7267868518829346, "learning_rate": 8.501089259667444e-06, "loss": 0.8149, "step": 7800 }, { "epoch": 0.27554539072537737, "grad_norm": 1.7425661087036133, "learning_rate": 8.500680862179807e-06, "loss": 0.8285, "step": 7801 }, { "epoch": 0.2755807125290853, "grad_norm": 1.6799392700195312, "learning_rate": 8.500272418875916e-06, "loss": 0.8453, "step": 7802 }, { "epoch": 0.2756160343327932, "grad_norm": 1.6930240392684937, "learning_rate": 8.499863929761119e-06, "loss": 0.8307, "step": 7803 }, { "epoch": 0.2756513561365011, "grad_norm": 1.6261979341506958, "learning_rate": 8.49945539484076e-06, "loss": 0.8688, "step": 7804 }, { "epoch": 0.275686677940209, "grad_norm": 1.6270629167556763, "learning_rate": 8.499046814120186e-06, "loss": 0.8199, "step": 7805 }, { "epoch": 0.2757219997439169, "grad_norm": 1.6645643711090088, "learning_rate": 8.498638187604746e-06, "loss": 0.8099, "step": 7806 }, { "epoch": 0.27575732154762483, "grad_norm": 1.537484049797058, "learning_rate": 8.498229515299787e-06, "loss": 0.8105, "step": 7807 }, { "epoch": 0.27579264335133274, "grad_norm": 1.0543200969696045, "learning_rate": 8.497820797210659e-06, "loss": 0.6062, "step": 7808 }, { "epoch": 0.27582796515504066, "grad_norm": 1.6248505115509033, "learning_rate": 8.497412033342709e-06, "loss": 0.8413, "step": 7809 }, { "epoch": 0.27586328695874857, "grad_norm": 1.6654815673828125, "learning_rate": 8.497003223701286e-06, "loss": 0.8576, "step": 7810 }, { "epoch": 0.2758986087624565, "grad_norm": 1.5220556259155273, "learning_rate": 8.496594368291744e-06, "loss": 0.8081, "step": 7811 }, { "epoch": 0.2759339305661644, "grad_norm": 1.5564842224121094, "learning_rate": 8.496185467119432e-06, "loss": 0.8539, "step": 7812 }, { "epoch": 0.27596925236987224, "grad_norm": 1.621877908706665, "learning_rate": 8.495776520189701e-06, "loss": 0.8497, "step": 7813 }, { "epoch": 0.27600457417358015, "grad_norm": 1.5025551319122314, "learning_rate": 8.495367527507904e-06, "loss": 0.8542, "step": 7814 }, { "epoch": 0.27603989597728806, "grad_norm": 1.672391414642334, "learning_rate": 8.494958489079396e-06, "loss": 0.8376, "step": 7815 }, { "epoch": 0.276075217780996, "grad_norm": 2.2270796298980713, "learning_rate": 8.494549404909527e-06, "loss": 0.8266, "step": 7816 }, { "epoch": 0.2761105395847039, "grad_norm": 1.5838340520858765, "learning_rate": 8.494140275003651e-06, "loss": 0.815, "step": 7817 }, { "epoch": 0.2761458613884118, "grad_norm": 1.5837697982788086, "learning_rate": 8.493731099367127e-06, "loss": 0.8618, "step": 7818 }, { "epoch": 0.2761811831921197, "grad_norm": 1.659677267074585, "learning_rate": 8.493321878005303e-06, "loss": 0.8733, "step": 7819 }, { "epoch": 0.2762165049958276, "grad_norm": 1.648471474647522, "learning_rate": 8.492912610923542e-06, "loss": 0.8112, "step": 7820 }, { "epoch": 0.2762518267995355, "grad_norm": 1.6236692667007446, "learning_rate": 8.492503298127197e-06, "loss": 0.8062, "step": 7821 }, { "epoch": 0.27628714860324344, "grad_norm": 1.615179419517517, "learning_rate": 8.492093939621624e-06, "loss": 0.8123, "step": 7822 }, { "epoch": 0.27632247040695135, "grad_norm": 1.2772397994995117, "learning_rate": 8.491684535412182e-06, "loss": 0.6105, "step": 7823 }, { "epoch": 0.27635779221065926, "grad_norm": 1.6068060398101807, "learning_rate": 8.49127508550423e-06, "loss": 0.8371, "step": 7824 }, { "epoch": 0.27639311401436717, "grad_norm": 1.8118480443954468, "learning_rate": 8.490865589903126e-06, "loss": 0.8738, "step": 7825 }, { "epoch": 0.276428435818075, "grad_norm": 1.5729864835739136, "learning_rate": 8.490456048614229e-06, "loss": 0.8327, "step": 7826 }, { "epoch": 0.27646375762178294, "grad_norm": 1.5506336688995361, "learning_rate": 8.4900464616429e-06, "loss": 0.8375, "step": 7827 }, { "epoch": 0.27649907942549085, "grad_norm": 1.7322266101837158, "learning_rate": 8.489636828994499e-06, "loss": 0.8591, "step": 7828 }, { "epoch": 0.27653440122919876, "grad_norm": 1.6624782085418701, "learning_rate": 8.489227150674387e-06, "loss": 0.8386, "step": 7829 }, { "epoch": 0.27656972303290667, "grad_norm": 1.5752688646316528, "learning_rate": 8.488817426687925e-06, "loss": 0.862, "step": 7830 }, { "epoch": 0.2766050448366146, "grad_norm": 1.831816554069519, "learning_rate": 8.488407657040476e-06, "loss": 0.7955, "step": 7831 }, { "epoch": 0.2766403666403225, "grad_norm": 1.6033034324645996, "learning_rate": 8.487997841737405e-06, "loss": 0.8592, "step": 7832 }, { "epoch": 0.2766756884440304, "grad_norm": 1.6377439498901367, "learning_rate": 8.487587980784074e-06, "loss": 0.8401, "step": 7833 }, { "epoch": 0.2767110102477383, "grad_norm": 1.588959813117981, "learning_rate": 8.487178074185845e-06, "loss": 0.8076, "step": 7834 }, { "epoch": 0.2767463320514462, "grad_norm": 1.6251918077468872, "learning_rate": 8.486768121948086e-06, "loss": 0.8373, "step": 7835 }, { "epoch": 0.27678165385515413, "grad_norm": 1.5579440593719482, "learning_rate": 8.486358124076162e-06, "loss": 0.83, "step": 7836 }, { "epoch": 0.27681697565886204, "grad_norm": 1.6766263246536255, "learning_rate": 8.48594808057544e-06, "loss": 0.8275, "step": 7837 }, { "epoch": 0.27685229746256995, "grad_norm": 1.4943264722824097, "learning_rate": 8.48553799145128e-06, "loss": 0.8023, "step": 7838 }, { "epoch": 0.2768876192662778, "grad_norm": 1.5990371704101562, "learning_rate": 8.485127856709057e-06, "loss": 0.8422, "step": 7839 }, { "epoch": 0.2769229410699857, "grad_norm": 1.748255968093872, "learning_rate": 8.484717676354135e-06, "loss": 0.8018, "step": 7840 }, { "epoch": 0.27695826287369363, "grad_norm": 1.8994026184082031, "learning_rate": 8.484307450391884e-06, "loss": 0.8472, "step": 7841 }, { "epoch": 0.27699358467740154, "grad_norm": 1.5241163969039917, "learning_rate": 8.483897178827671e-06, "loss": 0.8271, "step": 7842 }, { "epoch": 0.27702890648110945, "grad_norm": 1.5831514596939087, "learning_rate": 8.483486861666867e-06, "loss": 0.8175, "step": 7843 }, { "epoch": 0.27706422828481736, "grad_norm": 1.5273621082305908, "learning_rate": 8.483076498914842e-06, "loss": 0.8072, "step": 7844 }, { "epoch": 0.27709955008852527, "grad_norm": 1.8467888832092285, "learning_rate": 8.482666090576968e-06, "loss": 0.8577, "step": 7845 }, { "epoch": 0.2771348718922332, "grad_norm": 1.7275598049163818, "learning_rate": 8.482255636658612e-06, "loss": 0.8226, "step": 7846 }, { "epoch": 0.2771701936959411, "grad_norm": 1.6994918584823608, "learning_rate": 8.48184513716515e-06, "loss": 0.8601, "step": 7847 }, { "epoch": 0.277205515499649, "grad_norm": 1.0925312042236328, "learning_rate": 8.481434592101954e-06, "loss": 0.6252, "step": 7848 }, { "epoch": 0.2772408373033569, "grad_norm": 1.7656062841415405, "learning_rate": 8.481024001474394e-06, "loss": 0.841, "step": 7849 }, { "epoch": 0.2772761591070648, "grad_norm": 1.8139938116073608, "learning_rate": 8.480613365287848e-06, "loss": 0.8315, "step": 7850 }, { "epoch": 0.27731148091077273, "grad_norm": 1.772505521774292, "learning_rate": 8.480202683547688e-06, "loss": 0.8786, "step": 7851 }, { "epoch": 0.2773468027144806, "grad_norm": 1.687881588935852, "learning_rate": 8.479791956259291e-06, "loss": 0.8523, "step": 7852 }, { "epoch": 0.2773821245181885, "grad_norm": 1.5791521072387695, "learning_rate": 8.479381183428031e-06, "loss": 0.8245, "step": 7853 }, { "epoch": 0.2774174463218964, "grad_norm": 1.691298246383667, "learning_rate": 8.478970365059283e-06, "loss": 0.8148, "step": 7854 }, { "epoch": 0.2774527681256043, "grad_norm": 1.7469284534454346, "learning_rate": 8.478559501158426e-06, "loss": 0.8608, "step": 7855 }, { "epoch": 0.27748808992931223, "grad_norm": 1.6498279571533203, "learning_rate": 8.478148591730834e-06, "loss": 0.8369, "step": 7856 }, { "epoch": 0.27752341173302014, "grad_norm": 1.648195505142212, "learning_rate": 8.47773763678189e-06, "loss": 0.8258, "step": 7857 }, { "epoch": 0.27755873353672805, "grad_norm": 1.84922456741333, "learning_rate": 8.477326636316966e-06, "loss": 0.8465, "step": 7858 }, { "epoch": 0.27759405534043596, "grad_norm": 1.7151654958724976, "learning_rate": 8.476915590341448e-06, "loss": 0.8424, "step": 7859 }, { "epoch": 0.2776293771441439, "grad_norm": 1.6485166549682617, "learning_rate": 8.47650449886071e-06, "loss": 0.8687, "step": 7860 }, { "epoch": 0.2776646989478518, "grad_norm": 1.6605477333068848, "learning_rate": 8.476093361880135e-06, "loss": 0.8095, "step": 7861 }, { "epoch": 0.2777000207515597, "grad_norm": 1.7638846635818481, "learning_rate": 8.475682179405105e-06, "loss": 0.8517, "step": 7862 }, { "epoch": 0.2777353425552676, "grad_norm": 1.7898668050765991, "learning_rate": 8.475270951441e-06, "loss": 0.8553, "step": 7863 }, { "epoch": 0.2777706643589755, "grad_norm": 1.8908133506774902, "learning_rate": 8.4748596779932e-06, "loss": 0.8621, "step": 7864 }, { "epoch": 0.27780598616268337, "grad_norm": 0.9797611236572266, "learning_rate": 8.474448359067091e-06, "loss": 0.634, "step": 7865 }, { "epoch": 0.2778413079663913, "grad_norm": 1.6873774528503418, "learning_rate": 8.474036994668056e-06, "loss": 0.8603, "step": 7866 }, { "epoch": 0.2778766297700992, "grad_norm": 1.7868274450302124, "learning_rate": 8.473625584801475e-06, "loss": 0.8356, "step": 7867 }, { "epoch": 0.2779119515738071, "grad_norm": 1.7334636449813843, "learning_rate": 8.473214129472737e-06, "loss": 0.8599, "step": 7868 }, { "epoch": 0.277947273377515, "grad_norm": 0.9280667901039124, "learning_rate": 8.472802628687227e-06, "loss": 0.596, "step": 7869 }, { "epoch": 0.2779825951812229, "grad_norm": 1.5710058212280273, "learning_rate": 8.472391082450327e-06, "loss": 0.8207, "step": 7870 }, { "epoch": 0.27801791698493084, "grad_norm": 1.9808276891708374, "learning_rate": 8.471979490767424e-06, "loss": 0.8355, "step": 7871 }, { "epoch": 0.27805323878863875, "grad_norm": 1.6376389265060425, "learning_rate": 8.471567853643909e-06, "loss": 0.8102, "step": 7872 }, { "epoch": 0.27808856059234666, "grad_norm": 1.660536527633667, "learning_rate": 8.471156171085166e-06, "loss": 0.8424, "step": 7873 }, { "epoch": 0.27812388239605457, "grad_norm": 1.6473227739334106, "learning_rate": 8.470744443096583e-06, "loss": 0.8145, "step": 7874 }, { "epoch": 0.2781592041997625, "grad_norm": 1.674072027206421, "learning_rate": 8.470332669683547e-06, "loss": 0.8324, "step": 7875 }, { "epoch": 0.2781945260034704, "grad_norm": 1.519297480583191, "learning_rate": 8.469920850851451e-06, "loss": 0.808, "step": 7876 }, { "epoch": 0.2782298478071783, "grad_norm": 1.7471548318862915, "learning_rate": 8.469508986605684e-06, "loss": 0.8721, "step": 7877 }, { "epoch": 0.27826516961088615, "grad_norm": 1.6914973258972168, "learning_rate": 8.469097076951636e-06, "loss": 0.8255, "step": 7878 }, { "epoch": 0.27830049141459406, "grad_norm": 1.759317398071289, "learning_rate": 8.468685121894695e-06, "loss": 0.8332, "step": 7879 }, { "epoch": 0.278335813218302, "grad_norm": 1.6737473011016846, "learning_rate": 8.468273121440258e-06, "loss": 0.8616, "step": 7880 }, { "epoch": 0.2783711350220099, "grad_norm": 1.7780094146728516, "learning_rate": 8.467861075593714e-06, "loss": 0.836, "step": 7881 }, { "epoch": 0.2784064568257178, "grad_norm": 1.6091681718826294, "learning_rate": 8.467448984360455e-06, "loss": 0.8355, "step": 7882 }, { "epoch": 0.2784417786294257, "grad_norm": 1.7107073068618774, "learning_rate": 8.467036847745875e-06, "loss": 0.8224, "step": 7883 }, { "epoch": 0.2784771004331336, "grad_norm": 1.630326509475708, "learning_rate": 8.466624665755369e-06, "loss": 0.8266, "step": 7884 }, { "epoch": 0.27851242223684153, "grad_norm": 1.5646969079971313, "learning_rate": 8.466212438394333e-06, "loss": 0.8407, "step": 7885 }, { "epoch": 0.27854774404054944, "grad_norm": 1.9208556413650513, "learning_rate": 8.465800165668158e-06, "loss": 0.8599, "step": 7886 }, { "epoch": 0.27858306584425735, "grad_norm": 1.5285074710845947, "learning_rate": 8.465387847582241e-06, "loss": 0.7985, "step": 7887 }, { "epoch": 0.27861838764796526, "grad_norm": 1.645801305770874, "learning_rate": 8.464975484141982e-06, "loss": 0.8338, "step": 7888 }, { "epoch": 0.27865370945167317, "grad_norm": 1.6415181159973145, "learning_rate": 8.464563075352773e-06, "loss": 0.8234, "step": 7889 }, { "epoch": 0.2786890312553811, "grad_norm": 1.6134055852890015, "learning_rate": 8.464150621220016e-06, "loss": 0.8746, "step": 7890 }, { "epoch": 0.27872435305908894, "grad_norm": 1.5974185466766357, "learning_rate": 8.463738121749106e-06, "loss": 0.8251, "step": 7891 }, { "epoch": 0.27875967486279685, "grad_norm": 1.6503229141235352, "learning_rate": 8.46332557694544e-06, "loss": 0.8341, "step": 7892 }, { "epoch": 0.27879499666650476, "grad_norm": 1.500667929649353, "learning_rate": 8.462912986814422e-06, "loss": 0.8605, "step": 7893 }, { "epoch": 0.27883031847021267, "grad_norm": 1.928884744644165, "learning_rate": 8.46250035136145e-06, "loss": 0.8265, "step": 7894 }, { "epoch": 0.2788656402739206, "grad_norm": 1.7084921598434448, "learning_rate": 8.462087670591925e-06, "loss": 0.7856, "step": 7895 }, { "epoch": 0.2789009620776285, "grad_norm": 1.567520260810852, "learning_rate": 8.461674944511245e-06, "loss": 0.8219, "step": 7896 }, { "epoch": 0.2789362838813364, "grad_norm": 1.7487834692001343, "learning_rate": 8.461262173124816e-06, "loss": 0.8469, "step": 7897 }, { "epoch": 0.2789716056850443, "grad_norm": 1.6988036632537842, "learning_rate": 8.460849356438037e-06, "loss": 0.8335, "step": 7898 }, { "epoch": 0.2790069274887522, "grad_norm": 1.7555537223815918, "learning_rate": 8.460436494456313e-06, "loss": 0.7972, "step": 7899 }, { "epoch": 0.27904224929246013, "grad_norm": 1.568861484527588, "learning_rate": 8.460023587185047e-06, "loss": 0.8105, "step": 7900 }, { "epoch": 0.27907757109616804, "grad_norm": 1.6847734451293945, "learning_rate": 8.459610634629642e-06, "loss": 0.852, "step": 7901 }, { "epoch": 0.27911289289987595, "grad_norm": 1.7022937536239624, "learning_rate": 8.459197636795504e-06, "loss": 0.8356, "step": 7902 }, { "epoch": 0.27914821470358386, "grad_norm": 1.5568389892578125, "learning_rate": 8.458784593688038e-06, "loss": 0.8268, "step": 7903 }, { "epoch": 0.2791835365072917, "grad_norm": 1.8156265020370483, "learning_rate": 8.458371505312649e-06, "loss": 0.8678, "step": 7904 }, { "epoch": 0.27921885831099963, "grad_norm": 1.8004380464553833, "learning_rate": 8.457958371674741e-06, "loss": 0.8553, "step": 7905 }, { "epoch": 0.27925418011470754, "grad_norm": 1.5506083965301514, "learning_rate": 8.457545192779728e-06, "loss": 0.8147, "step": 7906 }, { "epoch": 0.27928950191841545, "grad_norm": 1.5521498918533325, "learning_rate": 8.457131968633013e-06, "loss": 0.8134, "step": 7907 }, { "epoch": 0.27932482372212336, "grad_norm": 1.5754896402359009, "learning_rate": 8.456718699240002e-06, "loss": 0.8167, "step": 7908 }, { "epoch": 0.27936014552583127, "grad_norm": 1.7458237409591675, "learning_rate": 8.456305384606107e-06, "loss": 0.8685, "step": 7909 }, { "epoch": 0.2793954673295392, "grad_norm": 1.6072371006011963, "learning_rate": 8.455892024736738e-06, "loss": 0.8469, "step": 7910 }, { "epoch": 0.2794307891332471, "grad_norm": 1.4340015649795532, "learning_rate": 8.455478619637304e-06, "loss": 0.8254, "step": 7911 }, { "epoch": 0.279466110936955, "grad_norm": 1.632442831993103, "learning_rate": 8.455065169313215e-06, "loss": 0.8568, "step": 7912 }, { "epoch": 0.2795014327406629, "grad_norm": 1.6802167892456055, "learning_rate": 8.454651673769883e-06, "loss": 0.8459, "step": 7913 }, { "epoch": 0.2795367545443708, "grad_norm": 1.6721211671829224, "learning_rate": 8.454238133012718e-06, "loss": 0.861, "step": 7914 }, { "epoch": 0.27957207634807874, "grad_norm": 1.6121009588241577, "learning_rate": 8.453824547047136e-06, "loss": 0.842, "step": 7915 }, { "epoch": 0.27960739815178665, "grad_norm": 1.8601949214935303, "learning_rate": 8.453410915878545e-06, "loss": 0.8184, "step": 7916 }, { "epoch": 0.2796427199554945, "grad_norm": 1.9050817489624023, "learning_rate": 8.452997239512361e-06, "loss": 0.874, "step": 7917 }, { "epoch": 0.2796780417592024, "grad_norm": 1.5278849601745605, "learning_rate": 8.452583517954001e-06, "loss": 0.8026, "step": 7918 }, { "epoch": 0.2797133635629103, "grad_norm": 1.6622531414031982, "learning_rate": 8.452169751208876e-06, "loss": 0.838, "step": 7919 }, { "epoch": 0.27974868536661823, "grad_norm": 1.5837167501449585, "learning_rate": 8.4517559392824e-06, "loss": 0.8145, "step": 7920 }, { "epoch": 0.27978400717032614, "grad_norm": 1.7955487966537476, "learning_rate": 8.451342082179994e-06, "loss": 0.8701, "step": 7921 }, { "epoch": 0.27981932897403405, "grad_norm": 1.6340237855911255, "learning_rate": 8.45092817990707e-06, "loss": 0.8387, "step": 7922 }, { "epoch": 0.27985465077774196, "grad_norm": 1.6709253787994385, "learning_rate": 8.450514232469046e-06, "loss": 0.837, "step": 7923 }, { "epoch": 0.2798899725814499, "grad_norm": 1.6962217092514038, "learning_rate": 8.450100239871342e-06, "loss": 0.8354, "step": 7924 }, { "epoch": 0.2799252943851578, "grad_norm": 1.7029204368591309, "learning_rate": 8.449686202119374e-06, "loss": 0.8254, "step": 7925 }, { "epoch": 0.2799606161888657, "grad_norm": 1.6433196067810059, "learning_rate": 8.449272119218562e-06, "loss": 0.8387, "step": 7926 }, { "epoch": 0.2799959379925736, "grad_norm": 1.6787134408950806, "learning_rate": 8.448857991174323e-06, "loss": 0.8764, "step": 7927 }, { "epoch": 0.2800312597962815, "grad_norm": 1.8950345516204834, "learning_rate": 8.44844381799208e-06, "loss": 0.8041, "step": 7928 }, { "epoch": 0.28006658159998943, "grad_norm": 2.0083446502685547, "learning_rate": 8.448029599677253e-06, "loss": 0.8339, "step": 7929 }, { "epoch": 0.2801019034036973, "grad_norm": 1.5455341339111328, "learning_rate": 8.447615336235263e-06, "loss": 0.8345, "step": 7930 }, { "epoch": 0.2801372252074052, "grad_norm": 1.790663480758667, "learning_rate": 8.44720102767153e-06, "loss": 0.8906, "step": 7931 }, { "epoch": 0.2801725470111131, "grad_norm": 1.568250060081482, "learning_rate": 8.446786673991479e-06, "loss": 0.8455, "step": 7932 }, { "epoch": 0.280207868814821, "grad_norm": 1.9209072589874268, "learning_rate": 8.446372275200531e-06, "loss": 0.8393, "step": 7933 }, { "epoch": 0.2802431906185289, "grad_norm": 2.0812997817993164, "learning_rate": 8.445957831304112e-06, "loss": 0.846, "step": 7934 }, { "epoch": 0.28027851242223684, "grad_norm": 1.799822211265564, "learning_rate": 8.445543342307646e-06, "loss": 0.8201, "step": 7935 }, { "epoch": 0.28031383422594475, "grad_norm": 1.8261882066726685, "learning_rate": 8.445128808216554e-06, "loss": 0.8241, "step": 7936 }, { "epoch": 0.28034915602965266, "grad_norm": 1.893288493156433, "learning_rate": 8.444714229036265e-06, "loss": 0.8015, "step": 7937 }, { "epoch": 0.28038447783336057, "grad_norm": 1.6438798904418945, "learning_rate": 8.444299604772202e-06, "loss": 0.8096, "step": 7938 }, { "epoch": 0.2804197996370685, "grad_norm": 1.8170781135559082, "learning_rate": 8.443884935429794e-06, "loss": 0.8366, "step": 7939 }, { "epoch": 0.2804551214407764, "grad_norm": 1.6550201177597046, "learning_rate": 8.443470221014466e-06, "loss": 0.8167, "step": 7940 }, { "epoch": 0.2804904432444843, "grad_norm": 1.7682856321334839, "learning_rate": 8.44305546153165e-06, "loss": 0.7992, "step": 7941 }, { "epoch": 0.2805257650481922, "grad_norm": 1.6131353378295898, "learning_rate": 8.442640656986771e-06, "loss": 0.8394, "step": 7942 }, { "epoch": 0.28056108685190007, "grad_norm": 1.8261818885803223, "learning_rate": 8.442225807385256e-06, "loss": 0.8379, "step": 7943 }, { "epoch": 0.280596408655608, "grad_norm": 1.823915719985962, "learning_rate": 8.441810912732538e-06, "loss": 0.8325, "step": 7944 }, { "epoch": 0.2806317304593159, "grad_norm": 1.7008826732635498, "learning_rate": 8.441395973034046e-06, "loss": 0.8246, "step": 7945 }, { "epoch": 0.2806670522630238, "grad_norm": 1.695588231086731, "learning_rate": 8.44098098829521e-06, "loss": 0.844, "step": 7946 }, { "epoch": 0.2807023740667317, "grad_norm": 1.868533968925476, "learning_rate": 8.440565958521463e-06, "loss": 0.8295, "step": 7947 }, { "epoch": 0.2807376958704396, "grad_norm": 1.6028847694396973, "learning_rate": 8.440150883718233e-06, "loss": 0.825, "step": 7948 }, { "epoch": 0.28077301767414753, "grad_norm": 1.697018027305603, "learning_rate": 8.439735763890957e-06, "loss": 0.8069, "step": 7949 }, { "epoch": 0.28080833947785544, "grad_norm": 1.8034809827804565, "learning_rate": 8.439320599045065e-06, "loss": 0.8483, "step": 7950 }, { "epoch": 0.28084366128156335, "grad_norm": 1.7422411441802979, "learning_rate": 8.438905389185991e-06, "loss": 0.826, "step": 7951 }, { "epoch": 0.28087898308527126, "grad_norm": 1.7075567245483398, "learning_rate": 8.438490134319171e-06, "loss": 0.8208, "step": 7952 }, { "epoch": 0.28091430488897917, "grad_norm": 1.7573500871658325, "learning_rate": 8.438074834450036e-06, "loss": 0.8692, "step": 7953 }, { "epoch": 0.2809496266926871, "grad_norm": 1.7280070781707764, "learning_rate": 8.437659489584027e-06, "loss": 0.8531, "step": 7954 }, { "epoch": 0.280984948496395, "grad_norm": 1.6488308906555176, "learning_rate": 8.437244099726574e-06, "loss": 0.8255, "step": 7955 }, { "epoch": 0.28102027030010285, "grad_norm": 1.7046490907669067, "learning_rate": 8.436828664883118e-06, "loss": 0.827, "step": 7956 }, { "epoch": 0.28105559210381076, "grad_norm": 1.5814688205718994, "learning_rate": 8.436413185059096e-06, "loss": 0.8029, "step": 7957 }, { "epoch": 0.28109091390751867, "grad_norm": 1.5807868242263794, "learning_rate": 8.435997660259943e-06, "loss": 0.8279, "step": 7958 }, { "epoch": 0.2811262357112266, "grad_norm": 1.5494747161865234, "learning_rate": 8.435582090491096e-06, "loss": 0.8531, "step": 7959 }, { "epoch": 0.2811615575149345, "grad_norm": 1.7622579336166382, "learning_rate": 8.435166475757999e-06, "loss": 0.8187, "step": 7960 }, { "epoch": 0.2811968793186424, "grad_norm": 1.864162564277649, "learning_rate": 8.434750816066088e-06, "loss": 0.8595, "step": 7961 }, { "epoch": 0.2812322011223503, "grad_norm": 1.9688400030136108, "learning_rate": 8.434335111420805e-06, "loss": 0.8667, "step": 7962 }, { "epoch": 0.2812675229260582, "grad_norm": 1.6268898248672485, "learning_rate": 8.433919361827589e-06, "loss": 0.8329, "step": 7963 }, { "epoch": 0.28130284472976613, "grad_norm": 1.5820708274841309, "learning_rate": 8.433503567291881e-06, "loss": 0.8208, "step": 7964 }, { "epoch": 0.28133816653347404, "grad_norm": 1.7511941194534302, "learning_rate": 8.433087727819125e-06, "loss": 0.8733, "step": 7965 }, { "epoch": 0.28137348833718195, "grad_norm": 0.9991807341575623, "learning_rate": 8.432671843414762e-06, "loss": 0.5492, "step": 7966 }, { "epoch": 0.28140881014088986, "grad_norm": 1.7026437520980835, "learning_rate": 8.432255914084235e-06, "loss": 0.8557, "step": 7967 }, { "epoch": 0.2814441319445978, "grad_norm": 1.8020435571670532, "learning_rate": 8.431839939832989e-06, "loss": 0.8398, "step": 7968 }, { "epoch": 0.28147945374830563, "grad_norm": 1.7500137090682983, "learning_rate": 8.431423920666463e-06, "loss": 0.8485, "step": 7969 }, { "epoch": 0.28151477555201354, "grad_norm": 1.54103422164917, "learning_rate": 8.43100785659011e-06, "loss": 0.8156, "step": 7970 }, { "epoch": 0.28155009735572145, "grad_norm": 1.6315066814422607, "learning_rate": 8.430591747609369e-06, "loss": 0.864, "step": 7971 }, { "epoch": 0.28158541915942936, "grad_norm": 1.7185075283050537, "learning_rate": 8.430175593729688e-06, "loss": 0.7868, "step": 7972 }, { "epoch": 0.2816207409631373, "grad_norm": 1.7021026611328125, "learning_rate": 8.429759394956516e-06, "loss": 0.8208, "step": 7973 }, { "epoch": 0.2816560627668452, "grad_norm": 1.6654266119003296, "learning_rate": 8.429343151295295e-06, "loss": 0.8419, "step": 7974 }, { "epoch": 0.2816913845705531, "grad_norm": 1.6516464948654175, "learning_rate": 8.42892686275148e-06, "loss": 0.8624, "step": 7975 }, { "epoch": 0.281726706374261, "grad_norm": 1.5830276012420654, "learning_rate": 8.428510529330509e-06, "loss": 0.8355, "step": 7976 }, { "epoch": 0.2817620281779689, "grad_norm": 1.5171259641647339, "learning_rate": 8.428094151037838e-06, "loss": 0.8325, "step": 7977 }, { "epoch": 0.2817973499816768, "grad_norm": 1.685083270072937, "learning_rate": 8.427677727878918e-06, "loss": 0.8054, "step": 7978 }, { "epoch": 0.28183267178538474, "grad_norm": 1.6517975330352783, "learning_rate": 8.427261259859195e-06, "loss": 0.8296, "step": 7979 }, { "epoch": 0.28186799358909265, "grad_norm": 1.7335395812988281, "learning_rate": 8.426844746984118e-06, "loss": 0.7967, "step": 7980 }, { "epoch": 0.28190331539280056, "grad_norm": 2.1217479705810547, "learning_rate": 8.426428189259144e-06, "loss": 0.8367, "step": 7981 }, { "epoch": 0.2819386371965084, "grad_norm": 1.7816429138183594, "learning_rate": 8.426011586689723e-06, "loss": 0.8488, "step": 7982 }, { "epoch": 0.2819739590002163, "grad_norm": 2.4741179943084717, "learning_rate": 8.425594939281303e-06, "loss": 0.8413, "step": 7983 }, { "epoch": 0.28200928080392423, "grad_norm": 1.1161189079284668, "learning_rate": 8.425178247039343e-06, "loss": 0.6037, "step": 7984 }, { "epoch": 0.28204460260763214, "grad_norm": 1.981648325920105, "learning_rate": 8.424761509969292e-06, "loss": 0.8185, "step": 7985 }, { "epoch": 0.28207992441134006, "grad_norm": 1.785756230354309, "learning_rate": 8.424344728076608e-06, "loss": 0.806, "step": 7986 }, { "epoch": 0.28211524621504797, "grad_norm": 1.629023790359497, "learning_rate": 8.423927901366744e-06, "loss": 0.8551, "step": 7987 }, { "epoch": 0.2821505680187559, "grad_norm": 1.6213918924331665, "learning_rate": 8.423511029845153e-06, "loss": 0.819, "step": 7988 }, { "epoch": 0.2821858898224638, "grad_norm": 1.9094511270523071, "learning_rate": 8.423094113517297e-06, "loss": 0.8613, "step": 7989 }, { "epoch": 0.2822212116261717, "grad_norm": 1.628199815750122, "learning_rate": 8.422677152388625e-06, "loss": 0.8527, "step": 7990 }, { "epoch": 0.2822565334298796, "grad_norm": 1.607699990272522, "learning_rate": 8.4222601464646e-06, "loss": 0.8223, "step": 7991 }, { "epoch": 0.2822918552335875, "grad_norm": 1.6980901956558228, "learning_rate": 8.421843095750675e-06, "loss": 0.8459, "step": 7992 }, { "epoch": 0.28232717703729543, "grad_norm": 1.6188331842422485, "learning_rate": 8.421426000252312e-06, "loss": 0.82, "step": 7993 }, { "epoch": 0.28236249884100334, "grad_norm": 1.8476719856262207, "learning_rate": 8.42100885997497e-06, "loss": 0.823, "step": 7994 }, { "epoch": 0.2823978206447112, "grad_norm": 0.9134775400161743, "learning_rate": 8.420591674924108e-06, "loss": 0.5823, "step": 7995 }, { "epoch": 0.2824331424484191, "grad_norm": 2.154933452606201, "learning_rate": 8.420174445105182e-06, "loss": 0.8782, "step": 7996 }, { "epoch": 0.282468464252127, "grad_norm": 1.8361419439315796, "learning_rate": 8.419757170523657e-06, "loss": 0.8419, "step": 7997 }, { "epoch": 0.2825037860558349, "grad_norm": 1.816607117652893, "learning_rate": 8.419339851184994e-06, "loss": 0.8704, "step": 7998 }, { "epoch": 0.28253910785954284, "grad_norm": 1.626856803894043, "learning_rate": 8.418922487094653e-06, "loss": 0.8314, "step": 7999 }, { "epoch": 0.28257442966325075, "grad_norm": 1.842097282409668, "learning_rate": 8.418505078258097e-06, "loss": 0.8734, "step": 8000 }, { "epoch": 0.28260975146695866, "grad_norm": 1.7658119201660156, "learning_rate": 8.418087624680789e-06, "loss": 0.8601, "step": 8001 }, { "epoch": 0.28264507327066657, "grad_norm": 1.7859684228897095, "learning_rate": 8.417670126368194e-06, "loss": 0.8473, "step": 8002 }, { "epoch": 0.2826803950743745, "grad_norm": 1.6546576023101807, "learning_rate": 8.417252583325773e-06, "loss": 0.7742, "step": 8003 }, { "epoch": 0.2827157168780824, "grad_norm": 1.6804929971694946, "learning_rate": 8.416834995558994e-06, "loss": 0.8641, "step": 8004 }, { "epoch": 0.2827510386817903, "grad_norm": 1.6788803339004517, "learning_rate": 8.41641736307332e-06, "loss": 0.826, "step": 8005 }, { "epoch": 0.2827863604854982, "grad_norm": 1.564240574836731, "learning_rate": 8.415999685874218e-06, "loss": 0.8488, "step": 8006 }, { "epoch": 0.2828216822892061, "grad_norm": 1.7981752157211304, "learning_rate": 8.415581963967154e-06, "loss": 0.8508, "step": 8007 }, { "epoch": 0.282857004092914, "grad_norm": 1.5517799854278564, "learning_rate": 8.415164197357596e-06, "loss": 0.8485, "step": 8008 }, { "epoch": 0.2828923258966219, "grad_norm": 1.660535454750061, "learning_rate": 8.41474638605101e-06, "loss": 0.8604, "step": 8009 }, { "epoch": 0.2829276477003298, "grad_norm": 1.633059024810791, "learning_rate": 8.414328530052866e-06, "loss": 0.8199, "step": 8010 }, { "epoch": 0.2829629695040377, "grad_norm": 1.8191540241241455, "learning_rate": 8.413910629368632e-06, "loss": 0.8034, "step": 8011 }, { "epoch": 0.2829982913077456, "grad_norm": 1.8765804767608643, "learning_rate": 8.413492684003777e-06, "loss": 0.8617, "step": 8012 }, { "epoch": 0.28303361311145353, "grad_norm": 1.6225981712341309, "learning_rate": 8.413074693963771e-06, "loss": 0.8152, "step": 8013 }, { "epoch": 0.28306893491516144, "grad_norm": 1.8304275274276733, "learning_rate": 8.412656659254087e-06, "loss": 0.8139, "step": 8014 }, { "epoch": 0.28310425671886935, "grad_norm": 1.7224715948104858, "learning_rate": 8.412238579880193e-06, "loss": 0.8624, "step": 8015 }, { "epoch": 0.28313957852257726, "grad_norm": 1.8639347553253174, "learning_rate": 8.411820455847561e-06, "loss": 0.846, "step": 8016 }, { "epoch": 0.2831749003262852, "grad_norm": 1.8756648302078247, "learning_rate": 8.411402287161665e-06, "loss": 0.7955, "step": 8017 }, { "epoch": 0.2832102221299931, "grad_norm": 1.8383396863937378, "learning_rate": 8.410984073827977e-06, "loss": 0.8427, "step": 8018 }, { "epoch": 0.283245543933701, "grad_norm": 1.7452590465545654, "learning_rate": 8.410565815851972e-06, "loss": 0.8606, "step": 8019 }, { "epoch": 0.2832808657374089, "grad_norm": 1.7552930116653442, "learning_rate": 8.410147513239123e-06, "loss": 0.877, "step": 8020 }, { "epoch": 0.28331618754111676, "grad_norm": 2.5760233402252197, "learning_rate": 8.409729165994901e-06, "loss": 0.8422, "step": 8021 }, { "epoch": 0.28335150934482467, "grad_norm": 1.5772291421890259, "learning_rate": 8.409310774124786e-06, "loss": 0.8185, "step": 8022 }, { "epoch": 0.2833868311485326, "grad_norm": 6.090249538421631, "learning_rate": 8.408892337634253e-06, "loss": 0.8165, "step": 8023 }, { "epoch": 0.2834221529522405, "grad_norm": 1.756601095199585, "learning_rate": 8.408473856528778e-06, "loss": 0.8005, "step": 8024 }, { "epoch": 0.2834574747559484, "grad_norm": 1.8843733072280884, "learning_rate": 8.408055330813837e-06, "loss": 0.8197, "step": 8025 }, { "epoch": 0.2834927965596563, "grad_norm": 1.7412149906158447, "learning_rate": 8.407636760494909e-06, "loss": 0.8122, "step": 8026 }, { "epoch": 0.2835281183633642, "grad_norm": 1.72439706325531, "learning_rate": 8.407218145577473e-06, "loss": 0.8383, "step": 8027 }, { "epoch": 0.28356344016707213, "grad_norm": 1.6829637289047241, "learning_rate": 8.406799486067005e-06, "loss": 0.8097, "step": 8028 }, { "epoch": 0.28359876197078004, "grad_norm": 1.7302660942077637, "learning_rate": 8.406380781968986e-06, "loss": 0.8469, "step": 8029 }, { "epoch": 0.28363408377448796, "grad_norm": 1.6930948495864868, "learning_rate": 8.405962033288894e-06, "loss": 0.8393, "step": 8030 }, { "epoch": 0.28366940557819587, "grad_norm": 1.5916893482208252, "learning_rate": 8.405543240032214e-06, "loss": 0.8202, "step": 8031 }, { "epoch": 0.2837047273819038, "grad_norm": 1.570523977279663, "learning_rate": 8.405124402204423e-06, "loss": 0.8179, "step": 8032 }, { "epoch": 0.2837400491856117, "grad_norm": 1.6412650346755981, "learning_rate": 8.404705519811004e-06, "loss": 0.8556, "step": 8033 }, { "epoch": 0.28377537098931954, "grad_norm": 1.62590491771698, "learning_rate": 8.404286592857439e-06, "loss": 0.8147, "step": 8034 }, { "epoch": 0.28381069279302745, "grad_norm": 1.6908503770828247, "learning_rate": 8.403867621349212e-06, "loss": 0.8296, "step": 8035 }, { "epoch": 0.28384601459673536, "grad_norm": 1.5956628322601318, "learning_rate": 8.403448605291804e-06, "loss": 0.81, "step": 8036 }, { "epoch": 0.2838813364004433, "grad_norm": 1.7139619588851929, "learning_rate": 8.403029544690701e-06, "loss": 0.8225, "step": 8037 }, { "epoch": 0.2839166582041512, "grad_norm": 1.6728307008743286, "learning_rate": 8.402610439551386e-06, "loss": 0.8535, "step": 8038 }, { "epoch": 0.2839519800078591, "grad_norm": 2.021315574645996, "learning_rate": 8.402191289879346e-06, "loss": 0.8707, "step": 8039 }, { "epoch": 0.283987301811567, "grad_norm": 1.859628677368164, "learning_rate": 8.401772095680067e-06, "loss": 0.8223, "step": 8040 }, { "epoch": 0.2840226236152749, "grad_norm": 1.7074439525604248, "learning_rate": 8.401352856959033e-06, "loss": 0.8552, "step": 8041 }, { "epoch": 0.2840579454189828, "grad_norm": 1.6113834381103516, "learning_rate": 8.400933573721732e-06, "loss": 0.806, "step": 8042 }, { "epoch": 0.28409326722269074, "grad_norm": 1.7713360786437988, "learning_rate": 8.400514245973654e-06, "loss": 0.8451, "step": 8043 }, { "epoch": 0.28412858902639865, "grad_norm": 2.002882957458496, "learning_rate": 8.400094873720282e-06, "loss": 0.8262, "step": 8044 }, { "epoch": 0.28416391083010656, "grad_norm": 1.7720519304275513, "learning_rate": 8.39967545696711e-06, "loss": 0.8358, "step": 8045 }, { "epoch": 0.28419923263381447, "grad_norm": 1.6038109064102173, "learning_rate": 8.399255995719621e-06, "loss": 0.8115, "step": 8046 }, { "epoch": 0.2842345544375224, "grad_norm": 1.0073362588882446, "learning_rate": 8.39883648998331e-06, "loss": 0.5788, "step": 8047 }, { "epoch": 0.28426987624123023, "grad_norm": 1.6764872074127197, "learning_rate": 8.398416939763668e-06, "loss": 0.8133, "step": 8048 }, { "epoch": 0.28430519804493815, "grad_norm": 1.6587624549865723, "learning_rate": 8.397997345066183e-06, "loss": 0.8695, "step": 8049 }, { "epoch": 0.28434051984864606, "grad_norm": 1.6288772821426392, "learning_rate": 8.397577705896348e-06, "loss": 0.8275, "step": 8050 }, { "epoch": 0.28437584165235397, "grad_norm": 1.8887778520584106, "learning_rate": 8.397158022259653e-06, "loss": 0.819, "step": 8051 }, { "epoch": 0.2844111634560619, "grad_norm": 1.6514314413070679, "learning_rate": 8.396738294161593e-06, "loss": 0.8193, "step": 8052 }, { "epoch": 0.2844464852597698, "grad_norm": 1.780125379562378, "learning_rate": 8.39631852160766e-06, "loss": 0.8475, "step": 8053 }, { "epoch": 0.2844818070634777, "grad_norm": 1.7137736082077026, "learning_rate": 8.39589870460335e-06, "loss": 0.8272, "step": 8054 }, { "epoch": 0.2845171288671856, "grad_norm": 0.8557800054550171, "learning_rate": 8.395478843154156e-06, "loss": 0.5708, "step": 8055 }, { "epoch": 0.2845524506708935, "grad_norm": 1.7596651315689087, "learning_rate": 8.395058937265572e-06, "loss": 0.8734, "step": 8056 }, { "epoch": 0.28458777247460143, "grad_norm": 1.7751986980438232, "learning_rate": 8.394638986943095e-06, "loss": 0.7932, "step": 8057 }, { "epoch": 0.28462309427830934, "grad_norm": 1.484827995300293, "learning_rate": 8.394218992192222e-06, "loss": 0.8208, "step": 8058 }, { "epoch": 0.28465841608201725, "grad_norm": 1.6996841430664062, "learning_rate": 8.39379895301845e-06, "loss": 0.8668, "step": 8059 }, { "epoch": 0.28469373788572516, "grad_norm": 1.6397403478622437, "learning_rate": 8.393378869427273e-06, "loss": 0.8238, "step": 8060 }, { "epoch": 0.284729059689433, "grad_norm": 1.8785055875778198, "learning_rate": 8.392958741424192e-06, "loss": 0.8247, "step": 8061 }, { "epoch": 0.28476438149314093, "grad_norm": 1.6175447702407837, "learning_rate": 8.392538569014705e-06, "loss": 0.8362, "step": 8062 }, { "epoch": 0.28479970329684884, "grad_norm": 1.5597487688064575, "learning_rate": 8.39211835220431e-06, "loss": 0.7861, "step": 8063 }, { "epoch": 0.28483502510055675, "grad_norm": 2.0184504985809326, "learning_rate": 8.391698090998507e-06, "loss": 0.8267, "step": 8064 }, { "epoch": 0.28487034690426466, "grad_norm": 1.0195350646972656, "learning_rate": 8.3912777854028e-06, "loss": 0.5984, "step": 8065 }, { "epoch": 0.28490566870797257, "grad_norm": 1.7212966680526733, "learning_rate": 8.390857435422686e-06, "loss": 0.8462, "step": 8066 }, { "epoch": 0.2849409905116805, "grad_norm": 1.608801245689392, "learning_rate": 8.390437041063666e-06, "loss": 0.8176, "step": 8067 }, { "epoch": 0.2849763123153884, "grad_norm": 1.666122317314148, "learning_rate": 8.390016602331244e-06, "loss": 0.8281, "step": 8068 }, { "epoch": 0.2850116341190963, "grad_norm": 1.7130132913589478, "learning_rate": 8.389596119230922e-06, "loss": 0.8471, "step": 8069 }, { "epoch": 0.2850469559228042, "grad_norm": 1.5815445184707642, "learning_rate": 8.389175591768204e-06, "loss": 0.827, "step": 8070 }, { "epoch": 0.2850822777265121, "grad_norm": 1.8698726892471313, "learning_rate": 8.388755019948592e-06, "loss": 0.8583, "step": 8071 }, { "epoch": 0.28511759953022003, "grad_norm": 1.653768539428711, "learning_rate": 8.388334403777589e-06, "loss": 0.834, "step": 8072 }, { "epoch": 0.28515292133392794, "grad_norm": 1.5848318338394165, "learning_rate": 8.387913743260705e-06, "loss": 0.8103, "step": 8073 }, { "epoch": 0.2851882431376358, "grad_norm": 1.586253046989441, "learning_rate": 8.387493038403442e-06, "loss": 0.8023, "step": 8074 }, { "epoch": 0.2852235649413437, "grad_norm": 1.7428877353668213, "learning_rate": 8.387072289211307e-06, "loss": 0.8226, "step": 8075 }, { "epoch": 0.2852588867450516, "grad_norm": 1.633431315422058, "learning_rate": 8.386651495689808e-06, "loss": 0.8779, "step": 8076 }, { "epoch": 0.28529420854875953, "grad_norm": 1.838579535484314, "learning_rate": 8.38623065784445e-06, "loss": 0.887, "step": 8077 }, { "epoch": 0.28532953035246744, "grad_norm": 1.8914132118225098, "learning_rate": 8.385809775680742e-06, "loss": 0.8217, "step": 8078 }, { "epoch": 0.28536485215617535, "grad_norm": 1.7949292659759521, "learning_rate": 8.38538884920419e-06, "loss": 0.8474, "step": 8079 }, { "epoch": 0.28540017395988326, "grad_norm": 1.6065475940704346, "learning_rate": 8.384967878420307e-06, "loss": 0.841, "step": 8080 }, { "epoch": 0.2854354957635912, "grad_norm": 1.653812289237976, "learning_rate": 8.3845468633346e-06, "loss": 0.8172, "step": 8081 }, { "epoch": 0.2854708175672991, "grad_norm": 2.0227444171905518, "learning_rate": 8.38412580395258e-06, "loss": 0.8038, "step": 8082 }, { "epoch": 0.285506139371007, "grad_norm": 1.7193424701690674, "learning_rate": 8.383704700279758e-06, "loss": 0.8183, "step": 8083 }, { "epoch": 0.2855414611747149, "grad_norm": 1.6409317255020142, "learning_rate": 8.383283552321643e-06, "loss": 0.8488, "step": 8084 }, { "epoch": 0.2855767829784228, "grad_norm": 1.4753401279449463, "learning_rate": 8.382862360083752e-06, "loss": 0.8044, "step": 8085 }, { "epoch": 0.2856121047821307, "grad_norm": 1.5724766254425049, "learning_rate": 8.382441123571594e-06, "loss": 0.8127, "step": 8086 }, { "epoch": 0.2856474265858386, "grad_norm": 1.8943208456039429, "learning_rate": 8.38201984279068e-06, "loss": 0.8186, "step": 8087 }, { "epoch": 0.2856827483895465, "grad_norm": 3.1824679374694824, "learning_rate": 8.381598517746528e-06, "loss": 0.8438, "step": 8088 }, { "epoch": 0.2857180701932544, "grad_norm": 1.5045725107192993, "learning_rate": 8.381177148444649e-06, "loss": 0.8039, "step": 8089 }, { "epoch": 0.2857533919969623, "grad_norm": 1.655258297920227, "learning_rate": 8.38075573489056e-06, "loss": 0.8539, "step": 8090 }, { "epoch": 0.2857887138006702, "grad_norm": 1.0224486589431763, "learning_rate": 8.380334277089774e-06, "loss": 0.631, "step": 8091 }, { "epoch": 0.28582403560437813, "grad_norm": 1.9226564168930054, "learning_rate": 8.37991277504781e-06, "loss": 0.8314, "step": 8092 }, { "epoch": 0.28585935740808605, "grad_norm": 1.7736629247665405, "learning_rate": 8.379491228770182e-06, "loss": 0.8445, "step": 8093 }, { "epoch": 0.28589467921179396, "grad_norm": 1.8679121732711792, "learning_rate": 8.37906963826241e-06, "loss": 0.8423, "step": 8094 }, { "epoch": 0.28593000101550187, "grad_norm": 1.744242787361145, "learning_rate": 8.378648003530009e-06, "loss": 0.8525, "step": 8095 }, { "epoch": 0.2859653228192098, "grad_norm": 1.7863435745239258, "learning_rate": 8.378226324578497e-06, "loss": 0.8599, "step": 8096 }, { "epoch": 0.2860006446229177, "grad_norm": 1.581215262413025, "learning_rate": 8.377804601413393e-06, "loss": 0.8151, "step": 8097 }, { "epoch": 0.2860359664266256, "grad_norm": 1.6548882722854614, "learning_rate": 8.37738283404022e-06, "loss": 0.8595, "step": 8098 }, { "epoch": 0.2860712882303335, "grad_norm": 1.616376519203186, "learning_rate": 8.376961022464494e-06, "loss": 0.8255, "step": 8099 }, { "epoch": 0.28610661003404136, "grad_norm": 1.5669163465499878, "learning_rate": 8.376539166691737e-06, "loss": 0.8171, "step": 8100 }, { "epoch": 0.2861419318377493, "grad_norm": 1.7881346940994263, "learning_rate": 8.376117266727469e-06, "loss": 0.8287, "step": 8101 }, { "epoch": 0.2861772536414572, "grad_norm": 1.513830542564392, "learning_rate": 8.375695322577216e-06, "loss": 0.8241, "step": 8102 }, { "epoch": 0.2862125754451651, "grad_norm": 2.5224058628082275, "learning_rate": 8.375273334246493e-06, "loss": 0.8545, "step": 8103 }, { "epoch": 0.286247897248873, "grad_norm": 1.9570759534835815, "learning_rate": 8.37485130174083e-06, "loss": 0.8312, "step": 8104 }, { "epoch": 0.2862832190525809, "grad_norm": 1.8111566305160522, "learning_rate": 8.374429225065746e-06, "loss": 0.8438, "step": 8105 }, { "epoch": 0.28631854085628883, "grad_norm": 1.5310653448104858, "learning_rate": 8.374007104226768e-06, "loss": 0.7999, "step": 8106 }, { "epoch": 0.28635386265999674, "grad_norm": 1.6544456481933594, "learning_rate": 8.37358493922942e-06, "loss": 0.8341, "step": 8107 }, { "epoch": 0.28638918446370465, "grad_norm": 1.7847851514816284, "learning_rate": 8.373162730079223e-06, "loss": 0.8202, "step": 8108 }, { "epoch": 0.28642450626741256, "grad_norm": 1.6251360177993774, "learning_rate": 8.372740476781708e-06, "loss": 0.8131, "step": 8109 }, { "epoch": 0.28645982807112047, "grad_norm": 1.7696738243103027, "learning_rate": 8.3723181793424e-06, "loss": 0.8417, "step": 8110 }, { "epoch": 0.2864951498748284, "grad_norm": 1.7775861024856567, "learning_rate": 8.371895837766825e-06, "loss": 0.8892, "step": 8111 }, { "epoch": 0.2865304716785363, "grad_norm": 1.9816995859146118, "learning_rate": 8.371473452060512e-06, "loss": 0.828, "step": 8112 }, { "epoch": 0.28656579348224415, "grad_norm": 1.5392420291900635, "learning_rate": 8.37105102222899e-06, "loss": 0.8139, "step": 8113 }, { "epoch": 0.28660111528595206, "grad_norm": 1.6880351305007935, "learning_rate": 8.370628548277783e-06, "loss": 0.8309, "step": 8114 }, { "epoch": 0.28663643708965997, "grad_norm": 1.7502284049987793, "learning_rate": 8.370206030212425e-06, "loss": 0.8523, "step": 8115 }, { "epoch": 0.2866717588933679, "grad_norm": 1.5658683776855469, "learning_rate": 8.369783468038442e-06, "loss": 0.793, "step": 8116 }, { "epoch": 0.2867070806970758, "grad_norm": 1.6556222438812256, "learning_rate": 8.369360861761368e-06, "loss": 0.8044, "step": 8117 }, { "epoch": 0.2867424025007837, "grad_norm": 1.5279940366744995, "learning_rate": 8.368938211386731e-06, "loss": 0.8093, "step": 8118 }, { "epoch": 0.2867777243044916, "grad_norm": 1.425554633140564, "learning_rate": 8.368515516920066e-06, "loss": 0.8049, "step": 8119 }, { "epoch": 0.2868130461081995, "grad_norm": 1.9492154121398926, "learning_rate": 8.3680927783669e-06, "loss": 0.8387, "step": 8120 }, { "epoch": 0.28684836791190743, "grad_norm": 1.7146323919296265, "learning_rate": 8.367669995732772e-06, "loss": 0.81, "step": 8121 }, { "epoch": 0.28688368971561534, "grad_norm": 1.7973952293395996, "learning_rate": 8.36724716902321e-06, "loss": 0.8461, "step": 8122 }, { "epoch": 0.28691901151932325, "grad_norm": 1.879860281944275, "learning_rate": 8.366824298243751e-06, "loss": 0.8516, "step": 8123 }, { "epoch": 0.28695433332303116, "grad_norm": 1.8380478620529175, "learning_rate": 8.366401383399929e-06, "loss": 0.795, "step": 8124 }, { "epoch": 0.2869896551267391, "grad_norm": 1.6176974773406982, "learning_rate": 8.36597842449728e-06, "loss": 0.8155, "step": 8125 }, { "epoch": 0.28702497693044693, "grad_norm": 1.6235777139663696, "learning_rate": 8.365555421541335e-06, "loss": 0.8261, "step": 8126 }, { "epoch": 0.28706029873415484, "grad_norm": 1.6174647808074951, "learning_rate": 8.365132374537635e-06, "loss": 0.8292, "step": 8127 }, { "epoch": 0.28709562053786275, "grad_norm": 1.8037030696868896, "learning_rate": 8.364709283491715e-06, "loss": 0.8366, "step": 8128 }, { "epoch": 0.28713094234157066, "grad_norm": 1.6729199886322021, "learning_rate": 8.36428614840911e-06, "loss": 0.8061, "step": 8129 }, { "epoch": 0.28716626414527857, "grad_norm": 2.7972655296325684, "learning_rate": 8.363862969295362e-06, "loss": 0.84, "step": 8130 }, { "epoch": 0.2872015859489865, "grad_norm": 1.9040971994400024, "learning_rate": 8.363439746156008e-06, "loss": 0.8588, "step": 8131 }, { "epoch": 0.2872369077526944, "grad_norm": 1.857462763786316, "learning_rate": 8.363016478996588e-06, "loss": 0.8459, "step": 8132 }, { "epoch": 0.2872722295564023, "grad_norm": 1.5571485757827759, "learning_rate": 8.362593167822639e-06, "loss": 0.8364, "step": 8133 }, { "epoch": 0.2873075513601102, "grad_norm": 1.5481129884719849, "learning_rate": 8.362169812639703e-06, "loss": 0.8385, "step": 8134 }, { "epoch": 0.2873428731638181, "grad_norm": 1.6492300033569336, "learning_rate": 8.36174641345332e-06, "loss": 0.8529, "step": 8135 }, { "epoch": 0.28737819496752603, "grad_norm": 1.8344659805297852, "learning_rate": 8.361322970269032e-06, "loss": 0.842, "step": 8136 }, { "epoch": 0.28741351677123395, "grad_norm": 1.5554476976394653, "learning_rate": 8.360899483092382e-06, "loss": 0.788, "step": 8137 }, { "epoch": 0.28744883857494186, "grad_norm": 1.8194714784622192, "learning_rate": 8.36047595192891e-06, "loss": 0.8345, "step": 8138 }, { "epoch": 0.2874841603786497, "grad_norm": 1.6558464765548706, "learning_rate": 8.360052376784162e-06, "loss": 0.819, "step": 8139 }, { "epoch": 0.2875194821823576, "grad_norm": 1.7945053577423096, "learning_rate": 8.359628757663679e-06, "loss": 0.8474, "step": 8140 }, { "epoch": 0.28755480398606553, "grad_norm": 1.6262942552566528, "learning_rate": 8.359205094573006e-06, "loss": 0.8301, "step": 8141 }, { "epoch": 0.28759012578977344, "grad_norm": 1.8449046611785889, "learning_rate": 8.358781387517689e-06, "loss": 0.8637, "step": 8142 }, { "epoch": 0.28762544759348135, "grad_norm": 1.7184374332427979, "learning_rate": 8.358357636503271e-06, "loss": 0.8189, "step": 8143 }, { "epoch": 0.28766076939718926, "grad_norm": 1.7294697761535645, "learning_rate": 8.357933841535302e-06, "loss": 0.8084, "step": 8144 }, { "epoch": 0.2876960912008972, "grad_norm": 1.583402156829834, "learning_rate": 8.357510002619324e-06, "loss": 0.8101, "step": 8145 }, { "epoch": 0.2877314130046051, "grad_norm": 1.5174412727355957, "learning_rate": 8.357086119760889e-06, "loss": 0.8262, "step": 8146 }, { "epoch": 0.287766734808313, "grad_norm": 1.6665116548538208, "learning_rate": 8.356662192965539e-06, "loss": 0.8189, "step": 8147 }, { "epoch": 0.2878020566120209, "grad_norm": 1.6214118003845215, "learning_rate": 8.356238222238827e-06, "loss": 0.8454, "step": 8148 }, { "epoch": 0.2878373784157288, "grad_norm": 1.8196971416473389, "learning_rate": 8.355814207586299e-06, "loss": 0.8274, "step": 8149 }, { "epoch": 0.28787270021943673, "grad_norm": 1.8245083093643188, "learning_rate": 8.355390149013506e-06, "loss": 0.8277, "step": 8150 }, { "epoch": 0.28790802202314464, "grad_norm": 1.6402252912521362, "learning_rate": 8.354966046525998e-06, "loss": 0.8391, "step": 8151 }, { "epoch": 0.2879433438268525, "grad_norm": 1.6523431539535522, "learning_rate": 8.354541900129323e-06, "loss": 0.8152, "step": 8152 }, { "epoch": 0.2879786656305604, "grad_norm": 0.9429556727409363, "learning_rate": 8.354117709829036e-06, "loss": 0.5917, "step": 8153 }, { "epoch": 0.2880139874342683, "grad_norm": 1.7433459758758545, "learning_rate": 8.353693475630686e-06, "loss": 0.8467, "step": 8154 }, { "epoch": 0.2880493092379762, "grad_norm": 1.6226048469543457, "learning_rate": 8.353269197539827e-06, "loss": 0.8004, "step": 8155 }, { "epoch": 0.28808463104168414, "grad_norm": 1.746686577796936, "learning_rate": 8.35284487556201e-06, "loss": 0.8471, "step": 8156 }, { "epoch": 0.28811995284539205, "grad_norm": 1.6681402921676636, "learning_rate": 8.35242050970279e-06, "loss": 0.7839, "step": 8157 }, { "epoch": 0.28815527464909996, "grad_norm": 1.8762314319610596, "learning_rate": 8.351996099967721e-06, "loss": 0.8278, "step": 8158 }, { "epoch": 0.28819059645280787, "grad_norm": 1.6413533687591553, "learning_rate": 8.351571646362357e-06, "loss": 0.8159, "step": 8159 }, { "epoch": 0.2882259182565158, "grad_norm": 1.5956041812896729, "learning_rate": 8.351147148892253e-06, "loss": 0.839, "step": 8160 }, { "epoch": 0.2882612400602237, "grad_norm": 1.659594178199768, "learning_rate": 8.350722607562967e-06, "loss": 0.8215, "step": 8161 }, { "epoch": 0.2882965618639316, "grad_norm": 1.6098113059997559, "learning_rate": 8.350298022380052e-06, "loss": 0.8152, "step": 8162 }, { "epoch": 0.2883318836676395, "grad_norm": 1.6601115465164185, "learning_rate": 8.349873393349065e-06, "loss": 0.8413, "step": 8163 }, { "epoch": 0.2883672054713474, "grad_norm": 1.645374059677124, "learning_rate": 8.349448720475566e-06, "loss": 0.8322, "step": 8164 }, { "epoch": 0.2884025272750553, "grad_norm": 1.7023515701293945, "learning_rate": 8.349024003765112e-06, "loss": 0.8111, "step": 8165 }, { "epoch": 0.2884378490787632, "grad_norm": 1.57707941532135, "learning_rate": 8.348599243223262e-06, "loss": 0.8418, "step": 8166 }, { "epoch": 0.2884731708824711, "grad_norm": 1.5489572286605835, "learning_rate": 8.348174438855573e-06, "loss": 0.8266, "step": 8167 }, { "epoch": 0.288508492686179, "grad_norm": 1.6406582593917847, "learning_rate": 8.347749590667606e-06, "loss": 0.8636, "step": 8168 }, { "epoch": 0.2885438144898869, "grad_norm": 1.6851590871810913, "learning_rate": 8.347324698664923e-06, "loss": 0.8424, "step": 8169 }, { "epoch": 0.28857913629359483, "grad_norm": 1.5983070135116577, "learning_rate": 8.346899762853082e-06, "loss": 0.8047, "step": 8170 }, { "epoch": 0.28861445809730274, "grad_norm": 1.7896100282669067, "learning_rate": 8.346474783237649e-06, "loss": 0.8409, "step": 8171 }, { "epoch": 0.28864977990101065, "grad_norm": 1.5078794956207275, "learning_rate": 8.34604975982418e-06, "loss": 0.8069, "step": 8172 }, { "epoch": 0.28868510170471856, "grad_norm": 2.436640501022339, "learning_rate": 8.345624692618242e-06, "loss": 0.8601, "step": 8173 }, { "epoch": 0.28872042350842647, "grad_norm": 1.708959698677063, "learning_rate": 8.345199581625397e-06, "loss": 0.815, "step": 8174 }, { "epoch": 0.2887557453121344, "grad_norm": 1.5370255708694458, "learning_rate": 8.344774426851207e-06, "loss": 0.8054, "step": 8175 }, { "epoch": 0.2887910671158423, "grad_norm": 1.8469414710998535, "learning_rate": 8.34434922830124e-06, "loss": 0.8056, "step": 8176 }, { "epoch": 0.2888263889195502, "grad_norm": 1.7896130084991455, "learning_rate": 8.34392398598106e-06, "loss": 0.8369, "step": 8177 }, { "epoch": 0.28886171072325806, "grad_norm": 1.6107605695724487, "learning_rate": 8.34349869989623e-06, "loss": 0.8395, "step": 8178 }, { "epoch": 0.28889703252696597, "grad_norm": 1.6709812879562378, "learning_rate": 8.343073370052317e-06, "loss": 0.8818, "step": 8179 }, { "epoch": 0.2889323543306739, "grad_norm": 1.5875455141067505, "learning_rate": 8.34264799645489e-06, "loss": 0.8357, "step": 8180 }, { "epoch": 0.2889676761343818, "grad_norm": 1.9011949300765991, "learning_rate": 8.34222257910951e-06, "loss": 0.8921, "step": 8181 }, { "epoch": 0.2890029979380897, "grad_norm": 1.6823582649230957, "learning_rate": 8.341797118021755e-06, "loss": 0.8164, "step": 8182 }, { "epoch": 0.2890383197417976, "grad_norm": 1.7339627742767334, "learning_rate": 8.341371613197185e-06, "loss": 0.8382, "step": 8183 }, { "epoch": 0.2890736415455055, "grad_norm": 1.8170015811920166, "learning_rate": 8.340946064641371e-06, "loss": 0.7945, "step": 8184 }, { "epoch": 0.28910896334921343, "grad_norm": 1.928098201751709, "learning_rate": 8.340520472359882e-06, "loss": 0.8624, "step": 8185 }, { "epoch": 0.28914428515292134, "grad_norm": 1.8403704166412354, "learning_rate": 8.34009483635829e-06, "loss": 0.8021, "step": 8186 }, { "epoch": 0.28917960695662925, "grad_norm": 1.6677638292312622, "learning_rate": 8.339669156642166e-06, "loss": 0.7984, "step": 8187 }, { "epoch": 0.28921492876033716, "grad_norm": 1.5969988107681274, "learning_rate": 8.339243433217078e-06, "loss": 0.8214, "step": 8188 }, { "epoch": 0.2892502505640451, "grad_norm": 1.6668540239334106, "learning_rate": 8.3388176660886e-06, "loss": 0.8517, "step": 8189 }, { "epoch": 0.289285572367753, "grad_norm": 1.6779084205627441, "learning_rate": 8.338391855262304e-06, "loss": 0.8415, "step": 8190 }, { "epoch": 0.28932089417146084, "grad_norm": 1.7112669944763184, "learning_rate": 8.337966000743765e-06, "loss": 0.8342, "step": 8191 }, { "epoch": 0.28935621597516875, "grad_norm": 1.5544285774230957, "learning_rate": 8.337540102538553e-06, "loss": 0.837, "step": 8192 }, { "epoch": 0.28939153777887666, "grad_norm": 1.6939536333084106, "learning_rate": 8.337114160652242e-06, "loss": 0.8429, "step": 8193 }, { "epoch": 0.2894268595825846, "grad_norm": 1.7508307695388794, "learning_rate": 8.33668817509041e-06, "loss": 0.8399, "step": 8194 }, { "epoch": 0.2894621813862925, "grad_norm": 1.5976645946502686, "learning_rate": 8.33626214585863e-06, "loss": 0.8479, "step": 8195 }, { "epoch": 0.2894975031900004, "grad_norm": 1.6395357847213745, "learning_rate": 8.335836072962477e-06, "loss": 0.8284, "step": 8196 }, { "epoch": 0.2895328249937083, "grad_norm": 1.6862703561782837, "learning_rate": 8.33540995640753e-06, "loss": 0.8547, "step": 8197 }, { "epoch": 0.2895681467974162, "grad_norm": 1.835514783859253, "learning_rate": 8.334983796199365e-06, "loss": 0.8248, "step": 8198 }, { "epoch": 0.2896034686011241, "grad_norm": 1.6722997426986694, "learning_rate": 8.334557592343558e-06, "loss": 0.8491, "step": 8199 }, { "epoch": 0.28963879040483204, "grad_norm": 1.9593923091888428, "learning_rate": 8.334131344845687e-06, "loss": 0.8553, "step": 8200 }, { "epoch": 0.28967411220853995, "grad_norm": 1.6011868715286255, "learning_rate": 8.333705053711335e-06, "loss": 0.8636, "step": 8201 }, { "epoch": 0.28970943401224786, "grad_norm": 1.6314001083374023, "learning_rate": 8.333278718946074e-06, "loss": 0.8394, "step": 8202 }, { "epoch": 0.28974475581595577, "grad_norm": 1.7672795057296753, "learning_rate": 8.33285234055549e-06, "loss": 0.8299, "step": 8203 }, { "epoch": 0.2897800776196636, "grad_norm": 1.6872737407684326, "learning_rate": 8.332425918545159e-06, "loss": 0.7994, "step": 8204 }, { "epoch": 0.28981539942337153, "grad_norm": 1.8103426694869995, "learning_rate": 8.331999452920667e-06, "loss": 0.868, "step": 8205 }, { "epoch": 0.28985072122707944, "grad_norm": 1.8263765573501587, "learning_rate": 8.33157294368759e-06, "loss": 0.8383, "step": 8206 }, { "epoch": 0.28988604303078735, "grad_norm": 1.8193548917770386, "learning_rate": 8.331146390851515e-06, "loss": 0.8195, "step": 8207 }, { "epoch": 0.28992136483449527, "grad_norm": 1.8853871822357178, "learning_rate": 8.33071979441802e-06, "loss": 0.7837, "step": 8208 }, { "epoch": 0.2899566866382032, "grad_norm": 1.6742312908172607, "learning_rate": 8.330293154392691e-06, "loss": 0.8078, "step": 8209 }, { "epoch": 0.2899920084419111, "grad_norm": 1.792229175567627, "learning_rate": 8.329866470781112e-06, "loss": 0.8457, "step": 8210 }, { "epoch": 0.290027330245619, "grad_norm": 1.7940311431884766, "learning_rate": 8.329439743588866e-06, "loss": 0.8514, "step": 8211 }, { "epoch": 0.2900626520493269, "grad_norm": 1.7355082035064697, "learning_rate": 8.329012972821538e-06, "loss": 0.8055, "step": 8212 }, { "epoch": 0.2900979738530348, "grad_norm": 2.055917501449585, "learning_rate": 8.328586158484715e-06, "loss": 0.8264, "step": 8213 }, { "epoch": 0.29013329565674273, "grad_norm": 1.849122405052185, "learning_rate": 8.328159300583982e-06, "loss": 0.8654, "step": 8214 }, { "epoch": 0.29016861746045064, "grad_norm": 1.6344249248504639, "learning_rate": 8.327732399124926e-06, "loss": 0.8148, "step": 8215 }, { "epoch": 0.29020393926415855, "grad_norm": 1.677567481994629, "learning_rate": 8.327305454113133e-06, "loss": 0.8517, "step": 8216 }, { "epoch": 0.2902392610678664, "grad_norm": 1.7888576984405518, "learning_rate": 8.326878465554192e-06, "loss": 0.8334, "step": 8217 }, { "epoch": 0.2902745828715743, "grad_norm": 1.7448550462722778, "learning_rate": 8.326451433453691e-06, "loss": 0.8321, "step": 8218 }, { "epoch": 0.2903099046752822, "grad_norm": 1.660536527633667, "learning_rate": 8.326024357817217e-06, "loss": 0.8594, "step": 8219 }, { "epoch": 0.29034522647899014, "grad_norm": 1.6658482551574707, "learning_rate": 8.325597238650364e-06, "loss": 0.785, "step": 8220 }, { "epoch": 0.29038054828269805, "grad_norm": 1.6791054010391235, "learning_rate": 8.325170075958719e-06, "loss": 0.8018, "step": 8221 }, { "epoch": 0.29041587008640596, "grad_norm": 1.9756627082824707, "learning_rate": 8.324742869747872e-06, "loss": 0.8561, "step": 8222 }, { "epoch": 0.29045119189011387, "grad_norm": 1.6738176345825195, "learning_rate": 8.324315620023418e-06, "loss": 0.8158, "step": 8223 }, { "epoch": 0.2904865136938218, "grad_norm": 1.4738692045211792, "learning_rate": 8.323888326790943e-06, "loss": 0.7835, "step": 8224 }, { "epoch": 0.2905218354975297, "grad_norm": 1.571989893913269, "learning_rate": 8.323460990056044e-06, "loss": 0.8201, "step": 8225 }, { "epoch": 0.2905571573012376, "grad_norm": 1.6468273401260376, "learning_rate": 8.323033609824313e-06, "loss": 0.8363, "step": 8226 }, { "epoch": 0.2905924791049455, "grad_norm": 1.2569414377212524, "learning_rate": 8.322606186101342e-06, "loss": 0.6367, "step": 8227 }, { "epoch": 0.2906278009086534, "grad_norm": 1.6496849060058594, "learning_rate": 8.322178718892725e-06, "loss": 0.8232, "step": 8228 }, { "epoch": 0.29066312271236133, "grad_norm": 1.6886504888534546, "learning_rate": 8.32175120820406e-06, "loss": 0.8154, "step": 8229 }, { "epoch": 0.2906984445160692, "grad_norm": 1.765263319015503, "learning_rate": 8.321323654040936e-06, "loss": 0.8115, "step": 8230 }, { "epoch": 0.2907337663197771, "grad_norm": 1.7161355018615723, "learning_rate": 8.320896056408953e-06, "loss": 0.8639, "step": 8231 }, { "epoch": 0.290769088123485, "grad_norm": 1.5470091104507446, "learning_rate": 8.320468415313707e-06, "loss": 0.8226, "step": 8232 }, { "epoch": 0.2908044099271929, "grad_norm": 1.7067458629608154, "learning_rate": 8.320040730760797e-06, "loss": 0.8077, "step": 8233 }, { "epoch": 0.29083973173090083, "grad_norm": 1.7096972465515137, "learning_rate": 8.319613002755815e-06, "loss": 0.8687, "step": 8234 }, { "epoch": 0.29087505353460874, "grad_norm": 1.8443852663040161, "learning_rate": 8.319185231304365e-06, "loss": 0.8581, "step": 8235 }, { "epoch": 0.29091037533831665, "grad_norm": 1.8259104490280151, "learning_rate": 8.31875741641204e-06, "loss": 0.8568, "step": 8236 }, { "epoch": 0.29094569714202456, "grad_norm": 1.5982260704040527, "learning_rate": 8.318329558084443e-06, "loss": 0.8546, "step": 8237 }, { "epoch": 0.2909810189457325, "grad_norm": 2.0893185138702393, "learning_rate": 8.317901656327172e-06, "loss": 0.8208, "step": 8238 }, { "epoch": 0.2910163407494404, "grad_norm": 1.7313096523284912, "learning_rate": 8.317473711145827e-06, "loss": 0.8491, "step": 8239 }, { "epoch": 0.2910516625531483, "grad_norm": 1.7976444959640503, "learning_rate": 8.31704572254601e-06, "loss": 0.8205, "step": 8240 }, { "epoch": 0.2910869843568562, "grad_norm": 2.034388542175293, "learning_rate": 8.316617690533325e-06, "loss": 0.8248, "step": 8241 }, { "epoch": 0.2911223061605641, "grad_norm": 1.9941649436950684, "learning_rate": 8.316189615113368e-06, "loss": 0.8455, "step": 8242 }, { "epoch": 0.29115762796427197, "grad_norm": 2.0276927947998047, "learning_rate": 8.315761496291746e-06, "loss": 0.8456, "step": 8243 }, { "epoch": 0.2911929497679799, "grad_norm": 1.6693187952041626, "learning_rate": 8.31533333407406e-06, "loss": 0.8335, "step": 8244 }, { "epoch": 0.2912282715716878, "grad_norm": 1.1618247032165527, "learning_rate": 8.314905128465915e-06, "loss": 0.58, "step": 8245 }, { "epoch": 0.2912635933753957, "grad_norm": 1.6475799083709717, "learning_rate": 8.314476879472914e-06, "loss": 0.8365, "step": 8246 }, { "epoch": 0.2912989151791036, "grad_norm": 1.8446998596191406, "learning_rate": 8.314048587100665e-06, "loss": 0.8098, "step": 8247 }, { "epoch": 0.2913342369828115, "grad_norm": 1.535187840461731, "learning_rate": 8.31362025135477e-06, "loss": 0.8506, "step": 8248 }, { "epoch": 0.29136955878651943, "grad_norm": 1.629112720489502, "learning_rate": 8.313191872240835e-06, "loss": 0.8704, "step": 8249 }, { "epoch": 0.29140488059022734, "grad_norm": 1.591532588005066, "learning_rate": 8.312763449764468e-06, "loss": 0.8355, "step": 8250 }, { "epoch": 0.29144020239393525, "grad_norm": 1.542574167251587, "learning_rate": 8.312334983931276e-06, "loss": 0.7917, "step": 8251 }, { "epoch": 0.29147552419764317, "grad_norm": 1.6683366298675537, "learning_rate": 8.311906474746867e-06, "loss": 0.8367, "step": 8252 }, { "epoch": 0.2915108460013511, "grad_norm": 1.917863368988037, "learning_rate": 8.311477922216847e-06, "loss": 0.8359, "step": 8253 }, { "epoch": 0.291546167805059, "grad_norm": 1.5535544157028198, "learning_rate": 8.311049326346827e-06, "loss": 0.8037, "step": 8254 }, { "epoch": 0.2915814896087669, "grad_norm": 1.7496837377548218, "learning_rate": 8.310620687142417e-06, "loss": 0.8499, "step": 8255 }, { "epoch": 0.29161681141247475, "grad_norm": 1.2300114631652832, "learning_rate": 8.310192004609225e-06, "loss": 0.582, "step": 8256 }, { "epoch": 0.29165213321618266, "grad_norm": 1.800540566444397, "learning_rate": 8.309763278752863e-06, "loss": 0.8151, "step": 8257 }, { "epoch": 0.2916874550198906, "grad_norm": 1.9652364253997803, "learning_rate": 8.30933450957894e-06, "loss": 0.8511, "step": 8258 }, { "epoch": 0.2917227768235985, "grad_norm": 1.5823043584823608, "learning_rate": 8.308905697093069e-06, "loss": 0.8316, "step": 8259 }, { "epoch": 0.2917580986273064, "grad_norm": 1.6359171867370605, "learning_rate": 8.308476841300863e-06, "loss": 0.8006, "step": 8260 }, { "epoch": 0.2917934204310143, "grad_norm": 1.979715347290039, "learning_rate": 8.308047942207933e-06, "loss": 0.8288, "step": 8261 }, { "epoch": 0.2918287422347222, "grad_norm": 1.583046317100525, "learning_rate": 8.307618999819896e-06, "loss": 0.8014, "step": 8262 }, { "epoch": 0.2918640640384301, "grad_norm": 1.6819809675216675, "learning_rate": 8.307190014142362e-06, "loss": 0.8273, "step": 8263 }, { "epoch": 0.29189938584213804, "grad_norm": 1.6573255062103271, "learning_rate": 8.306760985180947e-06, "loss": 0.8313, "step": 8264 }, { "epoch": 0.29193470764584595, "grad_norm": 1.9130724668502808, "learning_rate": 8.306331912941264e-06, "loss": 0.8584, "step": 8265 }, { "epoch": 0.29197002944955386, "grad_norm": 1.7676972150802612, "learning_rate": 8.305902797428931e-06, "loss": 0.8137, "step": 8266 }, { "epoch": 0.29200535125326177, "grad_norm": 1.840524673461914, "learning_rate": 8.305473638649564e-06, "loss": 0.8466, "step": 8267 }, { "epoch": 0.2920406730569697, "grad_norm": 1.6275689601898193, "learning_rate": 8.30504443660878e-06, "loss": 0.811, "step": 8268 }, { "epoch": 0.29207599486067753, "grad_norm": 1.689475178718567, "learning_rate": 8.304615191312196e-06, "loss": 0.8233, "step": 8269 }, { "epoch": 0.29211131666438545, "grad_norm": 1.6366719007492065, "learning_rate": 8.304185902765428e-06, "loss": 0.8217, "step": 8270 }, { "epoch": 0.29214663846809336, "grad_norm": 1.5537176132202148, "learning_rate": 8.303756570974098e-06, "loss": 0.7926, "step": 8271 }, { "epoch": 0.29218196027180127, "grad_norm": 1.534047245979309, "learning_rate": 8.303327195943821e-06, "loss": 0.8012, "step": 8272 }, { "epoch": 0.2922172820755092, "grad_norm": 1.78251051902771, "learning_rate": 8.30289777768022e-06, "loss": 0.8192, "step": 8273 }, { "epoch": 0.2922526038792171, "grad_norm": 1.773411512374878, "learning_rate": 8.302468316188913e-06, "loss": 0.8751, "step": 8274 }, { "epoch": 0.292287925682925, "grad_norm": 1.9171595573425293, "learning_rate": 8.30203881147552e-06, "loss": 0.8265, "step": 8275 }, { "epoch": 0.2923232474866329, "grad_norm": 1.695303201675415, "learning_rate": 8.301609263545667e-06, "loss": 0.8201, "step": 8276 }, { "epoch": 0.2923585692903408, "grad_norm": 1.6412802934646606, "learning_rate": 8.301179672404971e-06, "loss": 0.836, "step": 8277 }, { "epoch": 0.29239389109404873, "grad_norm": 3.9385528564453125, "learning_rate": 8.300750038059057e-06, "loss": 0.8279, "step": 8278 }, { "epoch": 0.29242921289775664, "grad_norm": 1.9673148393630981, "learning_rate": 8.300320360513547e-06, "loss": 0.8589, "step": 8279 }, { "epoch": 0.29246453470146455, "grad_norm": 1.7709629535675049, "learning_rate": 8.299890639774062e-06, "loss": 0.7961, "step": 8280 }, { "epoch": 0.29249985650517246, "grad_norm": 1.6041910648345947, "learning_rate": 8.299460875846231e-06, "loss": 0.8543, "step": 8281 }, { "epoch": 0.2925351783088803, "grad_norm": 1.881422519683838, "learning_rate": 8.299031068735676e-06, "loss": 0.8326, "step": 8282 }, { "epoch": 0.2925705001125882, "grad_norm": 1.6918120384216309, "learning_rate": 8.298601218448022e-06, "loss": 0.8504, "step": 8283 }, { "epoch": 0.29260582191629614, "grad_norm": 1.6254353523254395, "learning_rate": 8.298171324988895e-06, "loss": 0.7972, "step": 8284 }, { "epoch": 0.29264114372000405, "grad_norm": 1.6756820678710938, "learning_rate": 8.297741388363923e-06, "loss": 0.8163, "step": 8285 }, { "epoch": 0.29267646552371196, "grad_norm": 1.7335617542266846, "learning_rate": 8.29731140857873e-06, "loss": 0.8047, "step": 8286 }, { "epoch": 0.29271178732741987, "grad_norm": 1.8196537494659424, "learning_rate": 8.296881385638946e-06, "loss": 0.8594, "step": 8287 }, { "epoch": 0.2927471091311278, "grad_norm": 1.7662750482559204, "learning_rate": 8.296451319550198e-06, "loss": 0.8299, "step": 8288 }, { "epoch": 0.2927824309348357, "grad_norm": 1.5723744630813599, "learning_rate": 8.296021210318115e-06, "loss": 0.8534, "step": 8289 }, { "epoch": 0.2928177527385436, "grad_norm": 1.5431506633758545, "learning_rate": 8.295591057948325e-06, "loss": 0.8206, "step": 8290 }, { "epoch": 0.2928530745422515, "grad_norm": 1.7775180339813232, "learning_rate": 8.29516086244646e-06, "loss": 0.8201, "step": 8291 }, { "epoch": 0.2928883963459594, "grad_norm": 0.9468204379081726, "learning_rate": 8.294730623818148e-06, "loss": 0.5784, "step": 8292 }, { "epoch": 0.29292371814966733, "grad_norm": 1.7644938230514526, "learning_rate": 8.294300342069022e-06, "loss": 0.7883, "step": 8293 }, { "epoch": 0.29295903995337524, "grad_norm": 1.9144867658615112, "learning_rate": 8.293870017204711e-06, "loss": 0.8637, "step": 8294 }, { "epoch": 0.2929943617570831, "grad_norm": 1.5808364152908325, "learning_rate": 8.29343964923085e-06, "loss": 0.8071, "step": 8295 }, { "epoch": 0.293029683560791, "grad_norm": 1.9463624954223633, "learning_rate": 8.293009238153069e-06, "loss": 0.8472, "step": 8296 }, { "epoch": 0.2930650053644989, "grad_norm": 1.637513279914856, "learning_rate": 8.292578783977003e-06, "loss": 0.8297, "step": 8297 }, { "epoch": 0.29310032716820683, "grad_norm": 1.5246299505233765, "learning_rate": 8.292148286708284e-06, "loss": 0.8062, "step": 8298 }, { "epoch": 0.29313564897191474, "grad_norm": 1.8058630228042603, "learning_rate": 8.291717746352548e-06, "loss": 0.8426, "step": 8299 }, { "epoch": 0.29317097077562265, "grad_norm": 1.5959489345550537, "learning_rate": 8.291287162915426e-06, "loss": 0.831, "step": 8300 }, { "epoch": 0.29320629257933056, "grad_norm": 1.5381287336349487, "learning_rate": 8.29085653640256e-06, "loss": 0.8272, "step": 8301 }, { "epoch": 0.2932416143830385, "grad_norm": 1.62144136428833, "learning_rate": 8.29042586681958e-06, "loss": 0.832, "step": 8302 }, { "epoch": 0.2932769361867464, "grad_norm": 1.8875447511672974, "learning_rate": 8.289995154172126e-06, "loss": 0.8279, "step": 8303 }, { "epoch": 0.2933122579904543, "grad_norm": 1.7483974695205688, "learning_rate": 8.289564398465834e-06, "loss": 0.8786, "step": 8304 }, { "epoch": 0.2933475797941622, "grad_norm": 1.7309603691101074, "learning_rate": 8.28913359970634e-06, "loss": 0.8401, "step": 8305 }, { "epoch": 0.2933829015978701, "grad_norm": 1.6143786907196045, "learning_rate": 8.288702757899285e-06, "loss": 0.8046, "step": 8306 }, { "epoch": 0.293418223401578, "grad_norm": 1.6413793563842773, "learning_rate": 8.288271873050307e-06, "loss": 0.8027, "step": 8307 }, { "epoch": 0.2934535452052859, "grad_norm": 1.994921326637268, "learning_rate": 8.287840945165045e-06, "loss": 0.8671, "step": 8308 }, { "epoch": 0.2934888670089938, "grad_norm": 1.523634433746338, "learning_rate": 8.287409974249137e-06, "loss": 0.8122, "step": 8309 }, { "epoch": 0.2935241888127017, "grad_norm": 1.740474820137024, "learning_rate": 8.286978960308225e-06, "loss": 0.8324, "step": 8310 }, { "epoch": 0.2935595106164096, "grad_norm": 1.6603715419769287, "learning_rate": 8.286547903347951e-06, "loss": 0.834, "step": 8311 }, { "epoch": 0.2935948324201175, "grad_norm": 1.6687190532684326, "learning_rate": 8.286116803373956e-06, "loss": 0.8089, "step": 8312 }, { "epoch": 0.29363015422382543, "grad_norm": 1.6551408767700195, "learning_rate": 8.285685660391882e-06, "loss": 0.8458, "step": 8313 }, { "epoch": 0.29366547602753335, "grad_norm": 1.649661898612976, "learning_rate": 8.285254474407371e-06, "loss": 0.8742, "step": 8314 }, { "epoch": 0.29370079783124126, "grad_norm": 1.6280847787857056, "learning_rate": 8.284823245426069e-06, "loss": 0.8166, "step": 8315 }, { "epoch": 0.29373611963494917, "grad_norm": 1.6703407764434814, "learning_rate": 8.284391973453618e-06, "loss": 0.8394, "step": 8316 }, { "epoch": 0.2937714414386571, "grad_norm": 1.6368637084960938, "learning_rate": 8.28396065849566e-06, "loss": 0.8202, "step": 8317 }, { "epoch": 0.293806763242365, "grad_norm": 1.678900122642517, "learning_rate": 8.283529300557843e-06, "loss": 0.8353, "step": 8318 }, { "epoch": 0.2938420850460729, "grad_norm": 1.5160976648330688, "learning_rate": 8.283097899645811e-06, "loss": 0.8312, "step": 8319 }, { "epoch": 0.2938774068497808, "grad_norm": 1.8108057975769043, "learning_rate": 8.282666455765213e-06, "loss": 0.8144, "step": 8320 }, { "epoch": 0.29391272865348866, "grad_norm": 1.795892357826233, "learning_rate": 8.282234968921691e-06, "loss": 0.8225, "step": 8321 }, { "epoch": 0.2939480504571966, "grad_norm": 2.336484909057617, "learning_rate": 8.281803439120897e-06, "loss": 0.8362, "step": 8322 }, { "epoch": 0.2939833722609045, "grad_norm": 1.7557371854782104, "learning_rate": 8.281371866368476e-06, "loss": 0.8224, "step": 8323 }, { "epoch": 0.2940186940646124, "grad_norm": 1.6519391536712646, "learning_rate": 8.280940250670075e-06, "loss": 0.8021, "step": 8324 }, { "epoch": 0.2940540158683203, "grad_norm": 1.5611711740493774, "learning_rate": 8.280508592031347e-06, "loss": 0.8452, "step": 8325 }, { "epoch": 0.2940893376720282, "grad_norm": 1.8577895164489746, "learning_rate": 8.280076890457938e-06, "loss": 0.8524, "step": 8326 }, { "epoch": 0.2941246594757361, "grad_norm": 1.7173992395401, "learning_rate": 8.279645145955499e-06, "loss": 0.8859, "step": 8327 }, { "epoch": 0.29415998127944404, "grad_norm": 1.5761865377426147, "learning_rate": 8.279213358529681e-06, "loss": 0.8099, "step": 8328 }, { "epoch": 0.29419530308315195, "grad_norm": 1.6326435804367065, "learning_rate": 8.278781528186137e-06, "loss": 0.8152, "step": 8329 }, { "epoch": 0.29423062488685986, "grad_norm": 1.6756291389465332, "learning_rate": 8.278349654930514e-06, "loss": 0.8013, "step": 8330 }, { "epoch": 0.29426594669056777, "grad_norm": 1.6714816093444824, "learning_rate": 8.277917738768468e-06, "loss": 0.8362, "step": 8331 }, { "epoch": 0.2943012684942757, "grad_norm": 1.675034999847412, "learning_rate": 8.277485779705651e-06, "loss": 0.8197, "step": 8332 }, { "epoch": 0.2943365902979836, "grad_norm": 1.5859646797180176, "learning_rate": 8.277053777747716e-06, "loss": 0.7939, "step": 8333 }, { "epoch": 0.29437191210169145, "grad_norm": 1.6380306482315063, "learning_rate": 8.276621732900317e-06, "loss": 0.8384, "step": 8334 }, { "epoch": 0.29440723390539936, "grad_norm": 1.701780080795288, "learning_rate": 8.27618964516911e-06, "loss": 0.8103, "step": 8335 }, { "epoch": 0.29444255570910727, "grad_norm": 1.7597277164459229, "learning_rate": 8.275757514559748e-06, "loss": 0.8807, "step": 8336 }, { "epoch": 0.2944778775128152, "grad_norm": 1.9410840272903442, "learning_rate": 8.275325341077886e-06, "loss": 0.8366, "step": 8337 }, { "epoch": 0.2945131993165231, "grad_norm": 1.7191002368927002, "learning_rate": 8.274893124729181e-06, "loss": 0.7967, "step": 8338 }, { "epoch": 0.294548521120231, "grad_norm": 1.624937891960144, "learning_rate": 8.274460865519293e-06, "loss": 0.8543, "step": 8339 }, { "epoch": 0.2945838429239389, "grad_norm": 1.6180775165557861, "learning_rate": 8.274028563453876e-06, "loss": 0.8034, "step": 8340 }, { "epoch": 0.2946191647276468, "grad_norm": 1.7608520984649658, "learning_rate": 8.273596218538589e-06, "loss": 0.8708, "step": 8341 }, { "epoch": 0.29465448653135473, "grad_norm": 1.7954919338226318, "learning_rate": 8.27316383077909e-06, "loss": 0.8109, "step": 8342 }, { "epoch": 0.29468980833506264, "grad_norm": 1.7590054273605347, "learning_rate": 8.272731400181037e-06, "loss": 0.8675, "step": 8343 }, { "epoch": 0.29472513013877055, "grad_norm": 1.668928861618042, "learning_rate": 8.272298926750091e-06, "loss": 0.8229, "step": 8344 }, { "epoch": 0.29476045194247846, "grad_norm": 1.7423964738845825, "learning_rate": 8.271866410491912e-06, "loss": 0.8123, "step": 8345 }, { "epoch": 0.2947957737461864, "grad_norm": 1.871336579322815, "learning_rate": 8.27143385141216e-06, "loss": 0.8431, "step": 8346 }, { "epoch": 0.29483109554989423, "grad_norm": 1.8941892385482788, "learning_rate": 8.271001249516498e-06, "loss": 0.8366, "step": 8347 }, { "epoch": 0.29486641735360214, "grad_norm": 1.7051255702972412, "learning_rate": 8.270568604810585e-06, "loss": 0.7739, "step": 8348 }, { "epoch": 0.29490173915731005, "grad_norm": 1.8540745973587036, "learning_rate": 8.270135917300086e-06, "loss": 0.814, "step": 8349 }, { "epoch": 0.29493706096101796, "grad_norm": 1.90616774559021, "learning_rate": 8.269703186990663e-06, "loss": 0.7991, "step": 8350 }, { "epoch": 0.29497238276472587, "grad_norm": 1.9872827529907227, "learning_rate": 8.269270413887978e-06, "loss": 0.7799, "step": 8351 }, { "epoch": 0.2950077045684338, "grad_norm": 1.5296498537063599, "learning_rate": 8.268837597997696e-06, "loss": 0.8135, "step": 8352 }, { "epoch": 0.2950430263721417, "grad_norm": 1.7512606382369995, "learning_rate": 8.268404739325486e-06, "loss": 0.8552, "step": 8353 }, { "epoch": 0.2950783481758496, "grad_norm": 1.5983551740646362, "learning_rate": 8.267971837877006e-06, "loss": 0.7919, "step": 8354 }, { "epoch": 0.2951136699795575, "grad_norm": 1.7233591079711914, "learning_rate": 8.267538893657926e-06, "loss": 0.8341, "step": 8355 }, { "epoch": 0.2951489917832654, "grad_norm": 1.7887256145477295, "learning_rate": 8.267105906673909e-06, "loss": 0.8748, "step": 8356 }, { "epoch": 0.29518431358697333, "grad_norm": 1.6136938333511353, "learning_rate": 8.266672876930625e-06, "loss": 0.8011, "step": 8357 }, { "epoch": 0.29521963539068125, "grad_norm": 1.712681770324707, "learning_rate": 8.266239804433742e-06, "loss": 0.8292, "step": 8358 }, { "epoch": 0.29525495719438916, "grad_norm": 1.5554274320602417, "learning_rate": 8.265806689188924e-06, "loss": 0.8169, "step": 8359 }, { "epoch": 0.295290278998097, "grad_norm": 1.7406585216522217, "learning_rate": 8.265373531201844e-06, "loss": 0.8356, "step": 8360 }, { "epoch": 0.2953256008018049, "grad_norm": 1.6281321048736572, "learning_rate": 8.264940330478169e-06, "loss": 0.8333, "step": 8361 }, { "epoch": 0.29536092260551283, "grad_norm": 1.6240124702453613, "learning_rate": 8.264507087023568e-06, "loss": 0.8346, "step": 8362 }, { "epoch": 0.29539624440922074, "grad_norm": 1.7889752388000488, "learning_rate": 8.264073800843711e-06, "loss": 0.8354, "step": 8363 }, { "epoch": 0.29543156621292865, "grad_norm": 1.7660619020462036, "learning_rate": 8.26364047194427e-06, "loss": 0.8069, "step": 8364 }, { "epoch": 0.29546688801663656, "grad_norm": 1.8276515007019043, "learning_rate": 8.263207100330917e-06, "loss": 0.8729, "step": 8365 }, { "epoch": 0.2955022098203445, "grad_norm": 1.7421947717666626, "learning_rate": 8.262773686009322e-06, "loss": 0.8001, "step": 8366 }, { "epoch": 0.2955375316240524, "grad_norm": 1.5130960941314697, "learning_rate": 8.262340228985159e-06, "loss": 0.8156, "step": 8367 }, { "epoch": 0.2955728534277603, "grad_norm": 1.6935385465621948, "learning_rate": 8.2619067292641e-06, "loss": 0.8147, "step": 8368 }, { "epoch": 0.2956081752314682, "grad_norm": 1.6436867713928223, "learning_rate": 8.261473186851819e-06, "loss": 0.7974, "step": 8369 }, { "epoch": 0.2956434970351761, "grad_norm": 1.6690555810928345, "learning_rate": 8.26103960175399e-06, "loss": 0.801, "step": 8370 }, { "epoch": 0.295678818838884, "grad_norm": 1.5944509506225586, "learning_rate": 8.260605973976288e-06, "loss": 0.8142, "step": 8371 }, { "epoch": 0.29571414064259194, "grad_norm": 1.6306167840957642, "learning_rate": 8.260172303524388e-06, "loss": 0.8371, "step": 8372 }, { "epoch": 0.2957494624462998, "grad_norm": 1.9501487016677856, "learning_rate": 8.259738590403966e-06, "loss": 0.8086, "step": 8373 }, { "epoch": 0.2957847842500077, "grad_norm": 1.8935093879699707, "learning_rate": 8.259304834620698e-06, "loss": 0.8289, "step": 8374 }, { "epoch": 0.2958201060537156, "grad_norm": 1.785111665725708, "learning_rate": 8.25887103618026e-06, "loss": 0.8318, "step": 8375 }, { "epoch": 0.2958554278574235, "grad_norm": 0.9936736226081848, "learning_rate": 8.258437195088332e-06, "loss": 0.6213, "step": 8376 }, { "epoch": 0.29589074966113144, "grad_norm": 1.730017066001892, "learning_rate": 8.258003311350588e-06, "loss": 0.9006, "step": 8377 }, { "epoch": 0.29592607146483935, "grad_norm": 1.8270025253295898, "learning_rate": 8.257569384972711e-06, "loss": 0.8204, "step": 8378 }, { "epoch": 0.29596139326854726, "grad_norm": 1.9651753902435303, "learning_rate": 8.257135415960377e-06, "loss": 0.8469, "step": 8379 }, { "epoch": 0.29599671507225517, "grad_norm": 1.4701557159423828, "learning_rate": 8.256701404319268e-06, "loss": 0.817, "step": 8380 }, { "epoch": 0.2960320368759631, "grad_norm": 1.740830421447754, "learning_rate": 8.256267350055065e-06, "loss": 0.8165, "step": 8381 }, { "epoch": 0.296067358679671, "grad_norm": 1.602083444595337, "learning_rate": 8.255833253173444e-06, "loss": 0.8092, "step": 8382 }, { "epoch": 0.2961026804833789, "grad_norm": 1.6154226064682007, "learning_rate": 8.255399113680091e-06, "loss": 0.8144, "step": 8383 }, { "epoch": 0.2961380022870868, "grad_norm": 1.4719200134277344, "learning_rate": 8.254964931580686e-06, "loss": 0.7942, "step": 8384 }, { "epoch": 0.2961733240907947, "grad_norm": 1.6909724473953247, "learning_rate": 8.254530706880911e-06, "loss": 0.8282, "step": 8385 }, { "epoch": 0.2962086458945026, "grad_norm": 1.9771578311920166, "learning_rate": 8.254096439586449e-06, "loss": 0.8499, "step": 8386 }, { "epoch": 0.2962439676982105, "grad_norm": 1.9069703817367554, "learning_rate": 8.253662129702989e-06, "loss": 0.8521, "step": 8387 }, { "epoch": 0.2962792895019184, "grad_norm": 1.6191273927688599, "learning_rate": 8.253227777236206e-06, "loss": 0.8292, "step": 8388 }, { "epoch": 0.2963146113056263, "grad_norm": 1.6821353435516357, "learning_rate": 8.25279338219179e-06, "loss": 0.8293, "step": 8389 }, { "epoch": 0.2963499331093342, "grad_norm": 1.9270511865615845, "learning_rate": 8.25235894457543e-06, "loss": 0.8412, "step": 8390 }, { "epoch": 0.29638525491304213, "grad_norm": 1.6839120388031006, "learning_rate": 8.251924464392802e-06, "loss": 0.8202, "step": 8391 }, { "epoch": 0.29642057671675004, "grad_norm": 1.4747991561889648, "learning_rate": 8.251489941649603e-06, "loss": 0.792, "step": 8392 }, { "epoch": 0.29645589852045795, "grad_norm": 1.533295750617981, "learning_rate": 8.25105537635151e-06, "loss": 0.8437, "step": 8393 }, { "epoch": 0.29649122032416586, "grad_norm": 1.5821993350982666, "learning_rate": 8.250620768504218e-06, "loss": 0.8429, "step": 8394 }, { "epoch": 0.29652654212787377, "grad_norm": 1.5688334703445435, "learning_rate": 8.250186118113411e-06, "loss": 0.8194, "step": 8395 }, { "epoch": 0.2965618639315817, "grad_norm": 1.7389146089553833, "learning_rate": 8.24975142518478e-06, "loss": 0.8352, "step": 8396 }, { "epoch": 0.2965971857352896, "grad_norm": 1.5815844535827637, "learning_rate": 8.249316689724014e-06, "loss": 0.8602, "step": 8397 }, { "epoch": 0.2966325075389975, "grad_norm": 1.5933064222335815, "learning_rate": 8.248881911736801e-06, "loss": 0.813, "step": 8398 }, { "epoch": 0.29666782934270536, "grad_norm": 1.4958930015563965, "learning_rate": 8.248447091228832e-06, "loss": 0.8376, "step": 8399 }, { "epoch": 0.29670315114641327, "grad_norm": 1.7709203958511353, "learning_rate": 8.2480122282058e-06, "loss": 0.8646, "step": 8400 }, { "epoch": 0.2967384729501212, "grad_norm": 1.766200065612793, "learning_rate": 8.247577322673392e-06, "loss": 0.8382, "step": 8401 }, { "epoch": 0.2967737947538291, "grad_norm": 1.6429579257965088, "learning_rate": 8.247142374637303e-06, "loss": 0.8594, "step": 8402 }, { "epoch": 0.296809116557537, "grad_norm": 1.6611018180847168, "learning_rate": 8.246707384103225e-06, "loss": 0.8478, "step": 8403 }, { "epoch": 0.2968444383612449, "grad_norm": 1.5419474840164185, "learning_rate": 8.246272351076852e-06, "loss": 0.8232, "step": 8404 }, { "epoch": 0.2968797601649528, "grad_norm": 2.2619733810424805, "learning_rate": 8.245837275563876e-06, "loss": 0.83, "step": 8405 }, { "epoch": 0.29691508196866073, "grad_norm": 1.546459674835205, "learning_rate": 8.245402157569993e-06, "loss": 0.8042, "step": 8406 }, { "epoch": 0.29695040377236864, "grad_norm": 1.8390660285949707, "learning_rate": 8.244966997100895e-06, "loss": 0.8098, "step": 8407 }, { "epoch": 0.29698572557607655, "grad_norm": 1.6860671043395996, "learning_rate": 8.244531794162281e-06, "loss": 0.8519, "step": 8408 }, { "epoch": 0.29702104737978446, "grad_norm": 1.5447863340377808, "learning_rate": 8.244096548759843e-06, "loss": 0.8315, "step": 8409 }, { "epoch": 0.2970563691834924, "grad_norm": 1.7449491024017334, "learning_rate": 8.243661260899279e-06, "loss": 0.8085, "step": 8410 }, { "epoch": 0.2970916909872003, "grad_norm": 1.636536717414856, "learning_rate": 8.243225930586287e-06, "loss": 0.826, "step": 8411 }, { "epoch": 0.29712701279090814, "grad_norm": 1.782814860343933, "learning_rate": 8.242790557826564e-06, "loss": 0.8439, "step": 8412 }, { "epoch": 0.29716233459461605, "grad_norm": 1.7362747192382812, "learning_rate": 8.242355142625806e-06, "loss": 0.8433, "step": 8413 }, { "epoch": 0.29719765639832396, "grad_norm": 1.5392065048217773, "learning_rate": 8.241919684989715e-06, "loss": 0.7913, "step": 8414 }, { "epoch": 0.2972329782020319, "grad_norm": 2.108696937561035, "learning_rate": 8.24148418492399e-06, "loss": 0.87, "step": 8415 }, { "epoch": 0.2972683000057398, "grad_norm": 1.803601861000061, "learning_rate": 8.241048642434327e-06, "loss": 0.7992, "step": 8416 }, { "epoch": 0.2973036218094477, "grad_norm": 1.545880913734436, "learning_rate": 8.240613057526428e-06, "loss": 0.7936, "step": 8417 }, { "epoch": 0.2973389436131556, "grad_norm": 1.7163020372390747, "learning_rate": 8.240177430205998e-06, "loss": 0.8347, "step": 8418 }, { "epoch": 0.2973742654168635, "grad_norm": 1.6088526248931885, "learning_rate": 8.23974176047873e-06, "loss": 0.8107, "step": 8419 }, { "epoch": 0.2974095872205714, "grad_norm": 1.646904706954956, "learning_rate": 8.239306048350335e-06, "loss": 0.8282, "step": 8420 }, { "epoch": 0.29744490902427934, "grad_norm": 1.63209068775177, "learning_rate": 8.238870293826508e-06, "loss": 0.8491, "step": 8421 }, { "epoch": 0.29748023082798725, "grad_norm": 1.5998698472976685, "learning_rate": 8.238434496912958e-06, "loss": 0.8501, "step": 8422 }, { "epoch": 0.29751555263169516, "grad_norm": 1.681777834892273, "learning_rate": 8.237998657615385e-06, "loss": 0.8574, "step": 8423 }, { "epoch": 0.29755087443540307, "grad_norm": 1.468678593635559, "learning_rate": 8.237562775939495e-06, "loss": 0.8688, "step": 8424 }, { "epoch": 0.2975861962391109, "grad_norm": 1.4868251085281372, "learning_rate": 8.23712685189099e-06, "loss": 0.827, "step": 8425 }, { "epoch": 0.29762151804281883, "grad_norm": 1.5906141996383667, "learning_rate": 8.23669088547558e-06, "loss": 0.8042, "step": 8426 }, { "epoch": 0.29765683984652674, "grad_norm": 1.523249864578247, "learning_rate": 8.236254876698969e-06, "loss": 0.8521, "step": 8427 }, { "epoch": 0.29769216165023465, "grad_norm": 1.591749668121338, "learning_rate": 8.23581882556686e-06, "loss": 0.8149, "step": 8428 }, { "epoch": 0.29772748345394257, "grad_norm": 1.6037678718566895, "learning_rate": 8.235382732084962e-06, "loss": 0.8104, "step": 8429 }, { "epoch": 0.2977628052576505, "grad_norm": 1.6207584142684937, "learning_rate": 8.234946596258984e-06, "loss": 0.8404, "step": 8430 }, { "epoch": 0.2977981270613584, "grad_norm": 1.856522798538208, "learning_rate": 8.234510418094632e-06, "loss": 0.8125, "step": 8431 }, { "epoch": 0.2978334488650663, "grad_norm": 1.7528389692306519, "learning_rate": 8.234074197597616e-06, "loss": 0.8412, "step": 8432 }, { "epoch": 0.2978687706687742, "grad_norm": 1.6752954721450806, "learning_rate": 8.233637934773646e-06, "loss": 0.7549, "step": 8433 }, { "epoch": 0.2979040924724821, "grad_norm": 1.8371227979660034, "learning_rate": 8.23320162962843e-06, "loss": 0.8417, "step": 8434 }, { "epoch": 0.29793941427619003, "grad_norm": 1.7702510356903076, "learning_rate": 8.232765282167679e-06, "loss": 0.8175, "step": 8435 }, { "epoch": 0.29797473607989794, "grad_norm": 1.4644699096679688, "learning_rate": 8.232328892397104e-06, "loss": 0.8226, "step": 8436 }, { "epoch": 0.29801005788360585, "grad_norm": 1.908339500427246, "learning_rate": 8.231892460322415e-06, "loss": 0.8473, "step": 8437 }, { "epoch": 0.2980453796873137, "grad_norm": 1.576159954071045, "learning_rate": 8.231455985949326e-06, "loss": 0.8353, "step": 8438 }, { "epoch": 0.2980807014910216, "grad_norm": 1.5449819564819336, "learning_rate": 8.231019469283548e-06, "loss": 0.8458, "step": 8439 }, { "epoch": 0.2981160232947295, "grad_norm": 1.6652636528015137, "learning_rate": 8.230582910330796e-06, "loss": 0.8189, "step": 8440 }, { "epoch": 0.29815134509843744, "grad_norm": 1.6994463205337524, "learning_rate": 8.23014630909678e-06, "loss": 0.8346, "step": 8441 }, { "epoch": 0.29818666690214535, "grad_norm": 1.7828702926635742, "learning_rate": 8.229709665587217e-06, "loss": 0.8268, "step": 8442 }, { "epoch": 0.29822198870585326, "grad_norm": 1.5800166130065918, "learning_rate": 8.229272979807821e-06, "loss": 0.8423, "step": 8443 }, { "epoch": 0.29825731050956117, "grad_norm": 1.7062108516693115, "learning_rate": 8.228836251764308e-06, "loss": 0.8744, "step": 8444 }, { "epoch": 0.2982926323132691, "grad_norm": 1.5638896226882935, "learning_rate": 8.228399481462392e-06, "loss": 0.8249, "step": 8445 }, { "epoch": 0.298327954116977, "grad_norm": 1.8573930263519287, "learning_rate": 8.227962668907793e-06, "loss": 0.8498, "step": 8446 }, { "epoch": 0.2983632759206849, "grad_norm": 1.5935344696044922, "learning_rate": 8.22752581410622e-06, "loss": 0.7948, "step": 8447 }, { "epoch": 0.2983985977243928, "grad_norm": 1.7069580554962158, "learning_rate": 8.2270889170634e-06, "loss": 0.81, "step": 8448 }, { "epoch": 0.2984339195281007, "grad_norm": 1.6344153881072998, "learning_rate": 8.226651977785046e-06, "loss": 0.8208, "step": 8449 }, { "epoch": 0.29846924133180863, "grad_norm": 1.8482650518417358, "learning_rate": 8.226214996276877e-06, "loss": 0.8525, "step": 8450 }, { "epoch": 0.2985045631355165, "grad_norm": 1.6844782829284668, "learning_rate": 8.225777972544614e-06, "loss": 0.8003, "step": 8451 }, { "epoch": 0.2985398849392244, "grad_norm": 1.6299998760223389, "learning_rate": 8.225340906593975e-06, "loss": 0.8139, "step": 8452 }, { "epoch": 0.2985752067429323, "grad_norm": 1.866796851158142, "learning_rate": 8.22490379843068e-06, "loss": 0.8235, "step": 8453 }, { "epoch": 0.2986105285466402, "grad_norm": 1.6222394704818726, "learning_rate": 8.22446664806045e-06, "loss": 0.823, "step": 8454 }, { "epoch": 0.29864585035034813, "grad_norm": 1.6204805374145508, "learning_rate": 8.224029455489007e-06, "loss": 0.8226, "step": 8455 }, { "epoch": 0.29868117215405604, "grad_norm": 1.7552505731582642, "learning_rate": 8.223592220722072e-06, "loss": 0.9083, "step": 8456 }, { "epoch": 0.29871649395776395, "grad_norm": 1.794349193572998, "learning_rate": 8.223154943765367e-06, "loss": 0.8241, "step": 8457 }, { "epoch": 0.29875181576147186, "grad_norm": 1.7667245864868164, "learning_rate": 8.22271762462462e-06, "loss": 0.8384, "step": 8458 }, { "epoch": 0.2987871375651798, "grad_norm": 1.4793050289154053, "learning_rate": 8.222280263305546e-06, "loss": 0.7814, "step": 8459 }, { "epoch": 0.2988224593688877, "grad_norm": 1.6667664051055908, "learning_rate": 8.221842859813876e-06, "loss": 0.8403, "step": 8460 }, { "epoch": 0.2988577811725956, "grad_norm": 1.7505029439926147, "learning_rate": 8.221405414155332e-06, "loss": 0.853, "step": 8461 }, { "epoch": 0.2988931029763035, "grad_norm": 1.887473225593567, "learning_rate": 8.220967926335639e-06, "loss": 0.8475, "step": 8462 }, { "epoch": 0.2989284247800114, "grad_norm": 1.7441141605377197, "learning_rate": 8.220530396360524e-06, "loss": 0.8247, "step": 8463 }, { "epoch": 0.2989637465837193, "grad_norm": 1.7911391258239746, "learning_rate": 8.220092824235712e-06, "loss": 0.8234, "step": 8464 }, { "epoch": 0.2989990683874272, "grad_norm": 1.6236374378204346, "learning_rate": 8.21965520996693e-06, "loss": 0.8396, "step": 8465 }, { "epoch": 0.2990343901911351, "grad_norm": 1.668289065361023, "learning_rate": 8.219217553559908e-06, "loss": 0.8404, "step": 8466 }, { "epoch": 0.299069711994843, "grad_norm": 1.6397038698196411, "learning_rate": 8.218779855020369e-06, "loss": 0.826, "step": 8467 }, { "epoch": 0.2991050337985509, "grad_norm": 1.7588859796524048, "learning_rate": 8.218342114354046e-06, "loss": 0.868, "step": 8468 }, { "epoch": 0.2991403556022588, "grad_norm": 1.6575651168823242, "learning_rate": 8.217904331566667e-06, "loss": 0.8396, "step": 8469 }, { "epoch": 0.29917567740596673, "grad_norm": 1.7634154558181763, "learning_rate": 8.217466506663958e-06, "loss": 0.8264, "step": 8470 }, { "epoch": 0.29921099920967464, "grad_norm": 1.7091373205184937, "learning_rate": 8.217028639651654e-06, "loss": 0.856, "step": 8471 }, { "epoch": 0.29924632101338255, "grad_norm": 1.658708930015564, "learning_rate": 8.216590730535486e-06, "loss": 0.816, "step": 8472 }, { "epoch": 0.29928164281709047, "grad_norm": 1.7341182231903076, "learning_rate": 8.21615277932118e-06, "loss": 0.815, "step": 8473 }, { "epoch": 0.2993169646207984, "grad_norm": 1.5890672206878662, "learning_rate": 8.215714786014472e-06, "loss": 0.8173, "step": 8474 }, { "epoch": 0.2993522864245063, "grad_norm": 1.904799461364746, "learning_rate": 8.215276750621094e-06, "loss": 0.8364, "step": 8475 }, { "epoch": 0.2993876082282142, "grad_norm": 1.7504141330718994, "learning_rate": 8.214838673146777e-06, "loss": 0.8607, "step": 8476 }, { "epoch": 0.2994229300319221, "grad_norm": 1.763489007949829, "learning_rate": 8.214400553597257e-06, "loss": 0.8251, "step": 8477 }, { "epoch": 0.29945825183562996, "grad_norm": 1.5927356481552124, "learning_rate": 8.213962391978266e-06, "loss": 0.8346, "step": 8478 }, { "epoch": 0.2994935736393379, "grad_norm": 1.74683678150177, "learning_rate": 8.21352418829554e-06, "loss": 0.8665, "step": 8479 }, { "epoch": 0.2995288954430458, "grad_norm": 1.6750330924987793, "learning_rate": 8.213085942554814e-06, "loss": 0.8375, "step": 8480 }, { "epoch": 0.2995642172467537, "grad_norm": 1.6957485675811768, "learning_rate": 8.212647654761822e-06, "loss": 0.8381, "step": 8481 }, { "epoch": 0.2995995390504616, "grad_norm": 1.5852943658828735, "learning_rate": 8.212209324922303e-06, "loss": 0.8054, "step": 8482 }, { "epoch": 0.2996348608541695, "grad_norm": 1.7915464639663696, "learning_rate": 8.211770953041992e-06, "loss": 0.8385, "step": 8483 }, { "epoch": 0.2996701826578774, "grad_norm": 1.6719655990600586, "learning_rate": 8.211332539126625e-06, "loss": 0.8307, "step": 8484 }, { "epoch": 0.29970550446158534, "grad_norm": 1.692663311958313, "learning_rate": 8.210894083181942e-06, "loss": 0.8361, "step": 8485 }, { "epoch": 0.29974082626529325, "grad_norm": 1.7462244033813477, "learning_rate": 8.21045558521368e-06, "loss": 0.8595, "step": 8486 }, { "epoch": 0.29977614806900116, "grad_norm": 1.724311113357544, "learning_rate": 8.21001704522758e-06, "loss": 0.8055, "step": 8487 }, { "epoch": 0.29981146987270907, "grad_norm": 1.8467684984207153, "learning_rate": 8.20957846322938e-06, "loss": 0.8309, "step": 8488 }, { "epoch": 0.299846791676417, "grad_norm": 1.815692663192749, "learning_rate": 8.209139839224819e-06, "loss": 0.8087, "step": 8489 }, { "epoch": 0.2998821134801249, "grad_norm": 1.7097231149673462, "learning_rate": 8.20870117321964e-06, "loss": 0.8707, "step": 8490 }, { "epoch": 0.29991743528383275, "grad_norm": 1.6441854238510132, "learning_rate": 8.208262465219585e-06, "loss": 0.831, "step": 8491 }, { "epoch": 0.29995275708754066, "grad_norm": 1.9134935140609741, "learning_rate": 8.207823715230391e-06, "loss": 0.8389, "step": 8492 }, { "epoch": 0.29998807889124857, "grad_norm": 1.6392502784729004, "learning_rate": 8.207384923257806e-06, "loss": 0.8205, "step": 8493 }, { "epoch": 0.3000234006949565, "grad_norm": 1.544368863105774, "learning_rate": 8.206946089307568e-06, "loss": 0.8156, "step": 8494 }, { "epoch": 0.3000587224986644, "grad_norm": 1.4742623567581177, "learning_rate": 8.206507213385424e-06, "loss": 0.8144, "step": 8495 }, { "epoch": 0.3000940443023723, "grad_norm": 1.6479737758636475, "learning_rate": 8.206068295497117e-06, "loss": 0.8508, "step": 8496 }, { "epoch": 0.3001293661060802, "grad_norm": 1.6557857990264893, "learning_rate": 8.205629335648387e-06, "loss": 0.8461, "step": 8497 }, { "epoch": 0.3001646879097881, "grad_norm": 1.74299955368042, "learning_rate": 8.205190333844985e-06, "loss": 0.8198, "step": 8498 }, { "epoch": 0.30020000971349603, "grad_norm": 1.6583975553512573, "learning_rate": 8.204751290092656e-06, "loss": 0.7949, "step": 8499 }, { "epoch": 0.30023533151720394, "grad_norm": 1.7787730693817139, "learning_rate": 8.204312204397143e-06, "loss": 0.8518, "step": 8500 }, { "epoch": 0.30027065332091185, "grad_norm": 1.8062504529953003, "learning_rate": 8.203873076764194e-06, "loss": 0.8612, "step": 8501 }, { "epoch": 0.30030597512461976, "grad_norm": 1.9355181455612183, "learning_rate": 8.203433907199558e-06, "loss": 0.8462, "step": 8502 }, { "epoch": 0.3003412969283277, "grad_norm": 1.7488154172897339, "learning_rate": 8.20299469570898e-06, "loss": 0.8161, "step": 8503 }, { "epoch": 0.3003766187320355, "grad_norm": 1.7976422309875488, "learning_rate": 8.20255544229821e-06, "loss": 0.84, "step": 8504 }, { "epoch": 0.30041194053574344, "grad_norm": 1.6253690719604492, "learning_rate": 8.202116146972994e-06, "loss": 0.8326, "step": 8505 }, { "epoch": 0.30044726233945135, "grad_norm": 1.8801605701446533, "learning_rate": 8.201676809739085e-06, "loss": 0.8286, "step": 8506 }, { "epoch": 0.30048258414315926, "grad_norm": 1.7244960069656372, "learning_rate": 8.201237430602233e-06, "loss": 0.8223, "step": 8507 }, { "epoch": 0.30051790594686717, "grad_norm": 1.7220851182937622, "learning_rate": 8.200798009568187e-06, "loss": 0.8333, "step": 8508 }, { "epoch": 0.3005532277505751, "grad_norm": 1.6662462949752808, "learning_rate": 8.200358546642698e-06, "loss": 0.8254, "step": 8509 }, { "epoch": 0.300588549554283, "grad_norm": 1.6894077062606812, "learning_rate": 8.19991904183152e-06, "loss": 0.8531, "step": 8510 }, { "epoch": 0.3006238713579909, "grad_norm": 1.871077060699463, "learning_rate": 8.1994794951404e-06, "loss": 0.8391, "step": 8511 }, { "epoch": 0.3006591931616988, "grad_norm": 1.796520709991455, "learning_rate": 8.199039906575095e-06, "loss": 0.8298, "step": 8512 }, { "epoch": 0.3006945149654067, "grad_norm": 1.6266140937805176, "learning_rate": 8.198600276141358e-06, "loss": 0.8254, "step": 8513 }, { "epoch": 0.30072983676911463, "grad_norm": 1.6552300453186035, "learning_rate": 8.19816060384494e-06, "loss": 0.8039, "step": 8514 }, { "epoch": 0.30076515857282254, "grad_norm": 1.7468920946121216, "learning_rate": 8.197720889691597e-06, "loss": 0.8196, "step": 8515 }, { "epoch": 0.30080048037653045, "grad_norm": 1.8599604368209839, "learning_rate": 8.197281133687086e-06, "loss": 0.8328, "step": 8516 }, { "epoch": 0.3008358021802383, "grad_norm": 1.8012751340866089, "learning_rate": 8.19684133583716e-06, "loss": 0.82, "step": 8517 }, { "epoch": 0.3008711239839462, "grad_norm": 1.803176999092102, "learning_rate": 8.196401496147575e-06, "loss": 0.8276, "step": 8518 }, { "epoch": 0.30090644578765413, "grad_norm": 1.7525357007980347, "learning_rate": 8.195961614624089e-06, "loss": 0.8587, "step": 8519 }, { "epoch": 0.30094176759136204, "grad_norm": 2.0279481410980225, "learning_rate": 8.195521691272457e-06, "loss": 0.8507, "step": 8520 }, { "epoch": 0.30097708939506995, "grad_norm": 2.129362106323242, "learning_rate": 8.195081726098438e-06, "loss": 0.8352, "step": 8521 }, { "epoch": 0.30101241119877786, "grad_norm": 2.2490363121032715, "learning_rate": 8.19464171910779e-06, "loss": 0.8011, "step": 8522 }, { "epoch": 0.3010477330024858, "grad_norm": 1.6628018617630005, "learning_rate": 8.194201670306272e-06, "loss": 0.7767, "step": 8523 }, { "epoch": 0.3010830548061937, "grad_norm": 1.7109755277633667, "learning_rate": 8.193761579699643e-06, "loss": 0.8427, "step": 8524 }, { "epoch": 0.3011183766099016, "grad_norm": 1.9583137035369873, "learning_rate": 8.193321447293663e-06, "loss": 0.8432, "step": 8525 }, { "epoch": 0.3011536984136095, "grad_norm": 1.9660120010375977, "learning_rate": 8.192881273094092e-06, "loss": 0.7923, "step": 8526 }, { "epoch": 0.3011890202173174, "grad_norm": 1.8547240495681763, "learning_rate": 8.19244105710669e-06, "loss": 0.8296, "step": 8527 }, { "epoch": 0.3012243420210253, "grad_norm": 1.574465274810791, "learning_rate": 8.192000799337222e-06, "loss": 0.8028, "step": 8528 }, { "epoch": 0.30125966382473324, "grad_norm": 1.7784984111785889, "learning_rate": 8.191560499791447e-06, "loss": 0.8333, "step": 8529 }, { "epoch": 0.3012949856284411, "grad_norm": 1.667521595954895, "learning_rate": 8.191120158475126e-06, "loss": 0.8136, "step": 8530 }, { "epoch": 0.301330307432149, "grad_norm": 1.700240135192871, "learning_rate": 8.190679775394026e-06, "loss": 0.8653, "step": 8531 }, { "epoch": 0.3013656292358569, "grad_norm": 1.7902511358261108, "learning_rate": 8.190239350553908e-06, "loss": 0.8552, "step": 8532 }, { "epoch": 0.3014009510395648, "grad_norm": 1.5600091218948364, "learning_rate": 8.189798883960538e-06, "loss": 0.8351, "step": 8533 }, { "epoch": 0.30143627284327273, "grad_norm": 1.7275927066802979, "learning_rate": 8.189358375619678e-06, "loss": 0.834, "step": 8534 }, { "epoch": 0.30147159464698065, "grad_norm": 1.6715705394744873, "learning_rate": 8.188917825537098e-06, "loss": 0.8278, "step": 8535 }, { "epoch": 0.30150691645068856, "grad_norm": 1.6518622636795044, "learning_rate": 8.188477233718559e-06, "loss": 0.8179, "step": 8536 }, { "epoch": 0.30154223825439647, "grad_norm": 1.5988095998764038, "learning_rate": 8.188036600169828e-06, "loss": 0.8419, "step": 8537 }, { "epoch": 0.3015775600581044, "grad_norm": 1.7271628379821777, "learning_rate": 8.187595924896677e-06, "loss": 0.8229, "step": 8538 }, { "epoch": 0.3016128818618123, "grad_norm": 1.7795634269714355, "learning_rate": 8.187155207904867e-06, "loss": 0.8583, "step": 8539 }, { "epoch": 0.3016482036655202, "grad_norm": 1.659452199935913, "learning_rate": 8.18671444920017e-06, "loss": 0.8117, "step": 8540 }, { "epoch": 0.3016835254692281, "grad_norm": 1.6425977945327759, "learning_rate": 8.186273648788351e-06, "loss": 0.808, "step": 8541 }, { "epoch": 0.301718847272936, "grad_norm": 1.5946342945098877, "learning_rate": 8.185832806675185e-06, "loss": 0.8256, "step": 8542 }, { "epoch": 0.3017541690766439, "grad_norm": 1.7022926807403564, "learning_rate": 8.185391922866437e-06, "loss": 0.8052, "step": 8543 }, { "epoch": 0.3017894908803518, "grad_norm": 1.652121901512146, "learning_rate": 8.184950997367877e-06, "loss": 0.8125, "step": 8544 }, { "epoch": 0.3018248126840597, "grad_norm": 1.8661531209945679, "learning_rate": 8.184510030185278e-06, "loss": 0.8104, "step": 8545 }, { "epoch": 0.3018601344877676, "grad_norm": 1.7059963941574097, "learning_rate": 8.18406902132441e-06, "loss": 0.8316, "step": 8546 }, { "epoch": 0.3018954562914755, "grad_norm": 1.7555172443389893, "learning_rate": 8.183627970791045e-06, "loss": 0.7997, "step": 8547 }, { "epoch": 0.3019307780951834, "grad_norm": 1.9949787855148315, "learning_rate": 8.183186878590957e-06, "loss": 0.8334, "step": 8548 }, { "epoch": 0.30196609989889134, "grad_norm": 1.684691309928894, "learning_rate": 8.182745744729914e-06, "loss": 0.824, "step": 8549 }, { "epoch": 0.30200142170259925, "grad_norm": 1.776337742805481, "learning_rate": 8.182304569213697e-06, "loss": 0.8412, "step": 8550 }, { "epoch": 0.30203674350630716, "grad_norm": 1.622964859008789, "learning_rate": 8.181863352048075e-06, "loss": 0.8508, "step": 8551 }, { "epoch": 0.30207206531001507, "grad_norm": 1.772133469581604, "learning_rate": 8.181422093238821e-06, "loss": 0.827, "step": 8552 }, { "epoch": 0.302107387113723, "grad_norm": 2.007913589477539, "learning_rate": 8.180980792791716e-06, "loss": 0.82, "step": 8553 }, { "epoch": 0.3021427089174309, "grad_norm": 1.6915735006332397, "learning_rate": 8.18053945071253e-06, "loss": 0.8414, "step": 8554 }, { "epoch": 0.3021780307211388, "grad_norm": 1.6847498416900635, "learning_rate": 8.180098067007041e-06, "loss": 0.8381, "step": 8555 }, { "epoch": 0.30221335252484666, "grad_norm": 1.844395399093628, "learning_rate": 8.17965664168103e-06, "loss": 0.8673, "step": 8556 }, { "epoch": 0.30224867432855457, "grad_norm": 1.574517011642456, "learning_rate": 8.179215174740267e-06, "loss": 0.8432, "step": 8557 }, { "epoch": 0.3022839961322625, "grad_norm": 1.6105905771255493, "learning_rate": 8.178773666190535e-06, "loss": 0.8383, "step": 8558 }, { "epoch": 0.3023193179359704, "grad_norm": 1.6249487400054932, "learning_rate": 8.17833211603761e-06, "loss": 0.8094, "step": 8559 }, { "epoch": 0.3023546397396783, "grad_norm": 1.4331631660461426, "learning_rate": 8.177890524287273e-06, "loss": 0.7783, "step": 8560 }, { "epoch": 0.3023899615433862, "grad_norm": 1.7297443151474, "learning_rate": 8.1774488909453e-06, "loss": 0.8235, "step": 8561 }, { "epoch": 0.3024252833470941, "grad_norm": 1.6974438428878784, "learning_rate": 8.177007216017477e-06, "loss": 0.8688, "step": 8562 }, { "epoch": 0.30246060515080203, "grad_norm": 1.6561366319656372, "learning_rate": 8.176565499509576e-06, "loss": 0.7984, "step": 8563 }, { "epoch": 0.30249592695450994, "grad_norm": 1.5958834886550903, "learning_rate": 8.176123741427386e-06, "loss": 0.8387, "step": 8564 }, { "epoch": 0.30253124875821785, "grad_norm": 1.6312446594238281, "learning_rate": 8.175681941776686e-06, "loss": 0.8033, "step": 8565 }, { "epoch": 0.30256657056192576, "grad_norm": 1.8766875267028809, "learning_rate": 8.175240100563257e-06, "loss": 0.8094, "step": 8566 }, { "epoch": 0.3026018923656337, "grad_norm": 1.5668659210205078, "learning_rate": 8.17479821779288e-06, "loss": 0.7911, "step": 8567 }, { "epoch": 0.3026372141693416, "grad_norm": 1.7192938327789307, "learning_rate": 8.174356293471343e-06, "loss": 0.8315, "step": 8568 }, { "epoch": 0.30267253597304944, "grad_norm": 1.816512107849121, "learning_rate": 8.173914327604429e-06, "loss": 0.8406, "step": 8569 }, { "epoch": 0.30270785777675735, "grad_norm": 1.7225195169448853, "learning_rate": 8.173472320197918e-06, "loss": 0.8359, "step": 8570 }, { "epoch": 0.30274317958046526, "grad_norm": 1.8763716220855713, "learning_rate": 8.173030271257599e-06, "loss": 0.85, "step": 8571 }, { "epoch": 0.30277850138417317, "grad_norm": 1.8922945261001587, "learning_rate": 8.172588180789256e-06, "loss": 0.837, "step": 8572 }, { "epoch": 0.3028138231878811, "grad_norm": 1.8484416007995605, "learning_rate": 8.172146048798674e-06, "loss": 0.837, "step": 8573 }, { "epoch": 0.302849144991589, "grad_norm": 1.7386258840560913, "learning_rate": 8.171703875291645e-06, "loss": 0.8192, "step": 8574 }, { "epoch": 0.3028844667952969, "grad_norm": 1.7625075578689575, "learning_rate": 8.171261660273949e-06, "loss": 0.826, "step": 8575 }, { "epoch": 0.3029197885990048, "grad_norm": 1.5350545644760132, "learning_rate": 8.170819403751377e-06, "loss": 0.8019, "step": 8576 }, { "epoch": 0.3029551104027127, "grad_norm": 1.722349762916565, "learning_rate": 8.170377105729716e-06, "loss": 0.8362, "step": 8577 }, { "epoch": 0.30299043220642063, "grad_norm": 1.6895277500152588, "learning_rate": 8.169934766214755e-06, "loss": 0.8307, "step": 8578 }, { "epoch": 0.30302575401012855, "grad_norm": 1.6275862455368042, "learning_rate": 8.169492385212285e-06, "loss": 0.8199, "step": 8579 }, { "epoch": 0.30306107581383646, "grad_norm": 1.5051367282867432, "learning_rate": 8.169049962728093e-06, "loss": 0.8206, "step": 8580 }, { "epoch": 0.30309639761754437, "grad_norm": 1.6685450077056885, "learning_rate": 8.168607498767972e-06, "loss": 0.8455, "step": 8581 }, { "epoch": 0.3031317194212522, "grad_norm": 1.6767233610153198, "learning_rate": 8.168164993337713e-06, "loss": 0.8254, "step": 8582 }, { "epoch": 0.30316704122496013, "grad_norm": 1.8955343961715698, "learning_rate": 8.167722446443104e-06, "loss": 0.8199, "step": 8583 }, { "epoch": 0.30320236302866804, "grad_norm": 1.6019431352615356, "learning_rate": 8.16727985808994e-06, "loss": 0.8422, "step": 8584 }, { "epoch": 0.30323768483237595, "grad_norm": 1.7404073476791382, "learning_rate": 8.166837228284013e-06, "loss": 0.8436, "step": 8585 }, { "epoch": 0.30327300663608386, "grad_norm": 1.6588232517242432, "learning_rate": 8.166394557031115e-06, "loss": 0.8148, "step": 8586 }, { "epoch": 0.3033083284397918, "grad_norm": 1.8441007137298584, "learning_rate": 8.165951844337038e-06, "loss": 0.8243, "step": 8587 }, { "epoch": 0.3033436502434997, "grad_norm": 1.5833094120025635, "learning_rate": 8.165509090207582e-06, "loss": 0.8725, "step": 8588 }, { "epoch": 0.3033789720472076, "grad_norm": 1.6592307090759277, "learning_rate": 8.165066294648537e-06, "loss": 0.8595, "step": 8589 }, { "epoch": 0.3034142938509155, "grad_norm": 1.601995587348938, "learning_rate": 8.164623457665697e-06, "loss": 0.8165, "step": 8590 }, { "epoch": 0.3034496156546234, "grad_norm": 1.6908806562423706, "learning_rate": 8.164180579264863e-06, "loss": 0.843, "step": 8591 }, { "epoch": 0.3034849374583313, "grad_norm": 1.6265575885772705, "learning_rate": 8.163737659451827e-06, "loss": 0.8526, "step": 8592 }, { "epoch": 0.30352025926203924, "grad_norm": 1.6883213520050049, "learning_rate": 8.16329469823239e-06, "loss": 0.8123, "step": 8593 }, { "epoch": 0.30355558106574715, "grad_norm": 1.7942472696304321, "learning_rate": 8.162851695612342e-06, "loss": 0.8396, "step": 8594 }, { "epoch": 0.303590902869455, "grad_norm": 1.6990735530853271, "learning_rate": 8.16240865159749e-06, "loss": 0.7997, "step": 8595 }, { "epoch": 0.3036262246731629, "grad_norm": 1.8360322713851929, "learning_rate": 8.161965566193626e-06, "loss": 0.8109, "step": 8596 }, { "epoch": 0.3036615464768708, "grad_norm": 0.9371167421340942, "learning_rate": 8.161522439406549e-06, "loss": 0.5638, "step": 8597 }, { "epoch": 0.30369686828057874, "grad_norm": 1.455384373664856, "learning_rate": 8.161079271242062e-06, "loss": 0.7998, "step": 8598 }, { "epoch": 0.30373219008428665, "grad_norm": 1.7406747341156006, "learning_rate": 8.160636061705964e-06, "loss": 0.8312, "step": 8599 }, { "epoch": 0.30376751188799456, "grad_norm": 1.599649429321289, "learning_rate": 8.160192810804057e-06, "loss": 0.8458, "step": 8600 }, { "epoch": 0.30380283369170247, "grad_norm": 1.6634329557418823, "learning_rate": 8.159749518542139e-06, "loss": 0.8109, "step": 8601 }, { "epoch": 0.3038381554954104, "grad_norm": 1.6574523448944092, "learning_rate": 8.159306184926013e-06, "loss": 0.8615, "step": 8602 }, { "epoch": 0.3038734772991183, "grad_norm": 1.478914737701416, "learning_rate": 8.158862809961483e-06, "loss": 0.7898, "step": 8603 }, { "epoch": 0.3039087991028262, "grad_norm": 1.6741163730621338, "learning_rate": 8.158419393654349e-06, "loss": 0.8086, "step": 8604 }, { "epoch": 0.3039441209065341, "grad_norm": 1.673301339149475, "learning_rate": 8.157975936010416e-06, "loss": 0.8141, "step": 8605 }, { "epoch": 0.303979442710242, "grad_norm": 1.6021064519882202, "learning_rate": 8.157532437035487e-06, "loss": 0.859, "step": 8606 }, { "epoch": 0.30401476451394993, "grad_norm": 1.6213722229003906, "learning_rate": 8.157088896735367e-06, "loss": 0.8605, "step": 8607 }, { "epoch": 0.3040500863176578, "grad_norm": 1.692509412765503, "learning_rate": 8.156645315115862e-06, "loss": 0.843, "step": 8608 }, { "epoch": 0.3040854081213657, "grad_norm": 0.9850798845291138, "learning_rate": 8.156201692182778e-06, "loss": 0.599, "step": 8609 }, { "epoch": 0.3041207299250736, "grad_norm": 1.642137885093689, "learning_rate": 8.155758027941915e-06, "loss": 0.8451, "step": 8610 }, { "epoch": 0.3041560517287815, "grad_norm": 1.744380235671997, "learning_rate": 8.155314322399088e-06, "loss": 0.848, "step": 8611 }, { "epoch": 0.30419137353248943, "grad_norm": 1.705034852027893, "learning_rate": 8.154870575560099e-06, "loss": 0.8494, "step": 8612 }, { "epoch": 0.30422669533619734, "grad_norm": 1.6507762670516968, "learning_rate": 8.154426787430758e-06, "loss": 0.8358, "step": 8613 }, { "epoch": 0.30426201713990525, "grad_norm": 1.8639944791793823, "learning_rate": 8.15398295801687e-06, "loss": 0.8594, "step": 8614 }, { "epoch": 0.30429733894361316, "grad_norm": 1.6464794874191284, "learning_rate": 8.153539087324248e-06, "loss": 0.8105, "step": 8615 }, { "epoch": 0.30433266074732107, "grad_norm": 1.709181785583496, "learning_rate": 8.153095175358698e-06, "loss": 0.8096, "step": 8616 }, { "epoch": 0.304367982551029, "grad_norm": 1.8097132444381714, "learning_rate": 8.152651222126032e-06, "loss": 0.8747, "step": 8617 }, { "epoch": 0.3044033043547369, "grad_norm": 1.7440454959869385, "learning_rate": 8.152207227632058e-06, "loss": 0.8211, "step": 8618 }, { "epoch": 0.3044386261584448, "grad_norm": 1.9081497192382812, "learning_rate": 8.15176319188259e-06, "loss": 0.8516, "step": 8619 }, { "epoch": 0.3044739479621527, "grad_norm": 1.585391640663147, "learning_rate": 8.151319114883436e-06, "loss": 0.8081, "step": 8620 }, { "epoch": 0.30450926976586057, "grad_norm": 1.670538067817688, "learning_rate": 8.150874996640412e-06, "loss": 0.8176, "step": 8621 }, { "epoch": 0.3045445915695685, "grad_norm": 1.7351728677749634, "learning_rate": 8.150430837159326e-06, "loss": 0.8292, "step": 8622 }, { "epoch": 0.3045799133732764, "grad_norm": 1.851297378540039, "learning_rate": 8.149986636445994e-06, "loss": 0.8314, "step": 8623 }, { "epoch": 0.3046152351769843, "grad_norm": 1.0470541715621948, "learning_rate": 8.14954239450623e-06, "loss": 0.6313, "step": 8624 }, { "epoch": 0.3046505569806922, "grad_norm": 1.6929575204849243, "learning_rate": 8.149098111345848e-06, "loss": 0.8074, "step": 8625 }, { "epoch": 0.3046858787844001, "grad_norm": 1.6457558870315552, "learning_rate": 8.14865378697066e-06, "loss": 0.8054, "step": 8626 }, { "epoch": 0.30472120058810803, "grad_norm": 1.6105877161026, "learning_rate": 8.148209421386485e-06, "loss": 0.8415, "step": 8627 }, { "epoch": 0.30475652239181594, "grad_norm": 2.973700523376465, "learning_rate": 8.147765014599134e-06, "loss": 0.8298, "step": 8628 }, { "epoch": 0.30479184419552385, "grad_norm": 1.8442579507827759, "learning_rate": 8.147320566614429e-06, "loss": 0.8528, "step": 8629 }, { "epoch": 0.30482716599923176, "grad_norm": 1.6159725189208984, "learning_rate": 8.146876077438182e-06, "loss": 0.849, "step": 8630 }, { "epoch": 0.3048624878029397, "grad_norm": 1.598339319229126, "learning_rate": 8.146431547076214e-06, "loss": 0.8216, "step": 8631 }, { "epoch": 0.3048978096066476, "grad_norm": 1.6688380241394043, "learning_rate": 8.145986975534342e-06, "loss": 0.8333, "step": 8632 }, { "epoch": 0.3049331314103555, "grad_norm": 1.5322239398956299, "learning_rate": 8.145542362818382e-06, "loss": 0.8414, "step": 8633 }, { "epoch": 0.30496845321406335, "grad_norm": 1.6355783939361572, "learning_rate": 8.145097708934155e-06, "loss": 0.7925, "step": 8634 }, { "epoch": 0.30500377501777126, "grad_norm": 1.7311323881149292, "learning_rate": 8.14465301388748e-06, "loss": 0.8615, "step": 8635 }, { "epoch": 0.30503909682147917, "grad_norm": 1.7997721433639526, "learning_rate": 8.144208277684177e-06, "loss": 0.8296, "step": 8636 }, { "epoch": 0.3050744186251871, "grad_norm": 1.7442971467971802, "learning_rate": 8.14376350033007e-06, "loss": 0.8478, "step": 8637 }, { "epoch": 0.305109740428895, "grad_norm": 1.7626965045928955, "learning_rate": 8.143318681830977e-06, "loss": 0.8472, "step": 8638 }, { "epoch": 0.3051450622326029, "grad_norm": 1.5218031406402588, "learning_rate": 8.14287382219272e-06, "loss": 0.7928, "step": 8639 }, { "epoch": 0.3051803840363108, "grad_norm": 1.7626312971115112, "learning_rate": 8.142428921421119e-06, "loss": 0.8674, "step": 8640 }, { "epoch": 0.3052157058400187, "grad_norm": 1.6910600662231445, "learning_rate": 8.141983979522e-06, "loss": 0.8098, "step": 8641 }, { "epoch": 0.30525102764372664, "grad_norm": 1.4744848012924194, "learning_rate": 8.141538996501183e-06, "loss": 0.8557, "step": 8642 }, { "epoch": 0.30528634944743455, "grad_norm": 1.594537377357483, "learning_rate": 8.141093972364497e-06, "loss": 0.8191, "step": 8643 }, { "epoch": 0.30532167125114246, "grad_norm": 1.9640448093414307, "learning_rate": 8.140648907117762e-06, "loss": 0.8201, "step": 8644 }, { "epoch": 0.30535699305485037, "grad_norm": 1.7174204587936401, "learning_rate": 8.140203800766807e-06, "loss": 0.8313, "step": 8645 }, { "epoch": 0.3053923148585583, "grad_norm": 1.5605517625808716, "learning_rate": 8.139758653317454e-06, "loss": 0.8717, "step": 8646 }, { "epoch": 0.30542763666226613, "grad_norm": 1.600091576576233, "learning_rate": 8.139313464775529e-06, "loss": 0.844, "step": 8647 }, { "epoch": 0.30546295846597404, "grad_norm": 1.706821084022522, "learning_rate": 8.13886823514686e-06, "loss": 0.8153, "step": 8648 }, { "epoch": 0.30549828026968195, "grad_norm": 1.7644457817077637, "learning_rate": 8.138422964437274e-06, "loss": 0.8408, "step": 8649 }, { "epoch": 0.30553360207338986, "grad_norm": 1.74064302444458, "learning_rate": 8.137977652652595e-06, "loss": 0.8342, "step": 8650 }, { "epoch": 0.3055689238770978, "grad_norm": 1.9346768856048584, "learning_rate": 8.137532299798659e-06, "loss": 0.8254, "step": 8651 }, { "epoch": 0.3056042456808057, "grad_norm": 1.72988760471344, "learning_rate": 8.137086905881287e-06, "loss": 0.815, "step": 8652 }, { "epoch": 0.3056395674845136, "grad_norm": 1.4974557161331177, "learning_rate": 8.136641470906312e-06, "loss": 0.8022, "step": 8653 }, { "epoch": 0.3056748892882215, "grad_norm": 1.6108317375183105, "learning_rate": 8.136195994879563e-06, "loss": 0.7919, "step": 8654 }, { "epoch": 0.3057102110919294, "grad_norm": 1.1445564031600952, "learning_rate": 8.135750477806872e-06, "loss": 0.6276, "step": 8655 }, { "epoch": 0.30574553289563733, "grad_norm": 2.1211163997650146, "learning_rate": 8.135304919694067e-06, "loss": 0.8691, "step": 8656 }, { "epoch": 0.30578085469934524, "grad_norm": 1.8343474864959717, "learning_rate": 8.13485932054698e-06, "loss": 0.8189, "step": 8657 }, { "epoch": 0.30581617650305315, "grad_norm": 1.731246829032898, "learning_rate": 8.134413680371444e-06, "loss": 0.8468, "step": 8658 }, { "epoch": 0.30585149830676106, "grad_norm": 1.5739729404449463, "learning_rate": 8.133967999173292e-06, "loss": 0.8028, "step": 8659 }, { "epoch": 0.3058868201104689, "grad_norm": 1.6491512060165405, "learning_rate": 8.133522276958355e-06, "loss": 0.8609, "step": 8660 }, { "epoch": 0.3059221419141768, "grad_norm": 1.8524091243743896, "learning_rate": 8.133076513732467e-06, "loss": 0.8473, "step": 8661 }, { "epoch": 0.30595746371788474, "grad_norm": 1.6200875043869019, "learning_rate": 8.13263070950146e-06, "loss": 0.8084, "step": 8662 }, { "epoch": 0.30599278552159265, "grad_norm": 1.7944713830947876, "learning_rate": 8.132184864271177e-06, "loss": 0.8641, "step": 8663 }, { "epoch": 0.30602810732530056, "grad_norm": 1.7613474130630493, "learning_rate": 8.131738978047442e-06, "loss": 0.8288, "step": 8664 }, { "epoch": 0.30606342912900847, "grad_norm": 1.866540551185608, "learning_rate": 8.1312930508361e-06, "loss": 0.8131, "step": 8665 }, { "epoch": 0.3060987509327164, "grad_norm": 1.6832832098007202, "learning_rate": 8.13084708264298e-06, "loss": 0.8158, "step": 8666 }, { "epoch": 0.3061340727364243, "grad_norm": 1.5935466289520264, "learning_rate": 8.130401073473925e-06, "loss": 0.8221, "step": 8667 }, { "epoch": 0.3061693945401322, "grad_norm": 1.6746598482131958, "learning_rate": 8.129955023334769e-06, "loss": 0.8494, "step": 8668 }, { "epoch": 0.3062047163438401, "grad_norm": 1.722732663154602, "learning_rate": 8.129508932231347e-06, "loss": 0.8331, "step": 8669 }, { "epoch": 0.306240038147548, "grad_norm": 1.6479172706604004, "learning_rate": 8.129062800169502e-06, "loss": 0.8271, "step": 8670 }, { "epoch": 0.30627535995125593, "grad_norm": 1.5343456268310547, "learning_rate": 8.128616627155072e-06, "loss": 0.8373, "step": 8671 }, { "epoch": 0.30631068175496384, "grad_norm": 1.6557261943817139, "learning_rate": 8.128170413193894e-06, "loss": 0.8497, "step": 8672 }, { "epoch": 0.3063460035586717, "grad_norm": 1.610164761543274, "learning_rate": 8.12772415829181e-06, "loss": 0.8121, "step": 8673 }, { "epoch": 0.3063813253623796, "grad_norm": 1.6468734741210938, "learning_rate": 8.12727786245466e-06, "loss": 0.8219, "step": 8674 }, { "epoch": 0.3064166471660875, "grad_norm": 1.553666353225708, "learning_rate": 8.126831525688288e-06, "loss": 0.8489, "step": 8675 }, { "epoch": 0.30645196896979543, "grad_norm": 1.8744679689407349, "learning_rate": 8.126385147998531e-06, "loss": 0.8674, "step": 8676 }, { "epoch": 0.30648729077350334, "grad_norm": 1.5965920686721802, "learning_rate": 8.125938729391232e-06, "loss": 0.8181, "step": 8677 }, { "epoch": 0.30652261257721125, "grad_norm": 1.9198943376541138, "learning_rate": 8.125492269872236e-06, "loss": 0.8308, "step": 8678 }, { "epoch": 0.30655793438091916, "grad_norm": 1.7746667861938477, "learning_rate": 8.125045769447383e-06, "loss": 0.8508, "step": 8679 }, { "epoch": 0.30659325618462707, "grad_norm": 1.8607571125030518, "learning_rate": 8.12459922812252e-06, "loss": 0.8497, "step": 8680 }, { "epoch": 0.306628577988335, "grad_norm": 1.6660689115524292, "learning_rate": 8.124152645903488e-06, "loss": 0.7635, "step": 8681 }, { "epoch": 0.3066638997920429, "grad_norm": 1.768070936203003, "learning_rate": 8.123706022796135e-06, "loss": 0.842, "step": 8682 }, { "epoch": 0.3066992215957508, "grad_norm": 1.595766544342041, "learning_rate": 8.123259358806304e-06, "loss": 0.8203, "step": 8683 }, { "epoch": 0.3067345433994587, "grad_norm": 1.709174633026123, "learning_rate": 8.122812653939843e-06, "loss": 0.8009, "step": 8684 }, { "epoch": 0.3067698652031666, "grad_norm": 1.564623236656189, "learning_rate": 8.122365908202595e-06, "loss": 0.8024, "step": 8685 }, { "epoch": 0.3068051870068745, "grad_norm": 1.6957341432571411, "learning_rate": 8.12191912160041e-06, "loss": 0.8007, "step": 8686 }, { "epoch": 0.3068405088105824, "grad_norm": 1.7615983486175537, "learning_rate": 8.121472294139133e-06, "loss": 0.8303, "step": 8687 }, { "epoch": 0.3068758306142903, "grad_norm": 1.7624238729476929, "learning_rate": 8.121025425824615e-06, "loss": 0.8769, "step": 8688 }, { "epoch": 0.3069111524179982, "grad_norm": 1.7580208778381348, "learning_rate": 8.120578516662703e-06, "loss": 0.8125, "step": 8689 }, { "epoch": 0.3069464742217061, "grad_norm": 1.6729785203933716, "learning_rate": 8.120131566659243e-06, "loss": 0.8576, "step": 8690 }, { "epoch": 0.30698179602541403, "grad_norm": 1.704213261604309, "learning_rate": 8.119684575820091e-06, "loss": 0.8366, "step": 8691 }, { "epoch": 0.30701711782912194, "grad_norm": 1.4780579805374146, "learning_rate": 8.119237544151094e-06, "loss": 0.7829, "step": 8692 }, { "epoch": 0.30705243963282985, "grad_norm": 1.4977072477340698, "learning_rate": 8.1187904716581e-06, "loss": 0.8175, "step": 8693 }, { "epoch": 0.30708776143653777, "grad_norm": 1.831347942352295, "learning_rate": 8.118343358346962e-06, "loss": 0.8059, "step": 8694 }, { "epoch": 0.3071230832402457, "grad_norm": 1.7435247898101807, "learning_rate": 8.117896204223536e-06, "loss": 0.8063, "step": 8695 }, { "epoch": 0.3071584050439536, "grad_norm": 1.6588973999023438, "learning_rate": 8.117449009293668e-06, "loss": 0.8488, "step": 8696 }, { "epoch": 0.3071937268476615, "grad_norm": 1.6333385705947876, "learning_rate": 8.117001773563215e-06, "loss": 0.8456, "step": 8697 }, { "epoch": 0.3072290486513694, "grad_norm": 1.9909669160842896, "learning_rate": 8.116554497038027e-06, "loss": 0.83, "step": 8698 }, { "epoch": 0.30726437045507726, "grad_norm": 1.7223609685897827, "learning_rate": 8.11610717972396e-06, "loss": 0.8287, "step": 8699 }, { "epoch": 0.3072996922587852, "grad_norm": 1.7619597911834717, "learning_rate": 8.115659821626869e-06, "loss": 0.8276, "step": 8700 }, { "epoch": 0.3073350140624931, "grad_norm": 1.7361394166946411, "learning_rate": 8.115212422752607e-06, "loss": 0.8287, "step": 8701 }, { "epoch": 0.307370335866201, "grad_norm": 1.7418180704116821, "learning_rate": 8.114764983107029e-06, "loss": 0.8073, "step": 8702 }, { "epoch": 0.3074056576699089, "grad_norm": 1.5127414464950562, "learning_rate": 8.114317502695995e-06, "loss": 0.8332, "step": 8703 }, { "epoch": 0.3074409794736168, "grad_norm": 1.6366310119628906, "learning_rate": 8.113869981525358e-06, "loss": 0.8354, "step": 8704 }, { "epoch": 0.3074763012773247, "grad_norm": 1.7612366676330566, "learning_rate": 8.113422419600975e-06, "loss": 0.8211, "step": 8705 }, { "epoch": 0.30751162308103264, "grad_norm": 1.6657689809799194, "learning_rate": 8.112974816928707e-06, "loss": 0.8455, "step": 8706 }, { "epoch": 0.30754694488474055, "grad_norm": 1.6281256675720215, "learning_rate": 8.112527173514409e-06, "loss": 0.8421, "step": 8707 }, { "epoch": 0.30758226668844846, "grad_norm": 1.6917235851287842, "learning_rate": 8.112079489363938e-06, "loss": 0.831, "step": 8708 }, { "epoch": 0.30761758849215637, "grad_norm": 1.7611099481582642, "learning_rate": 8.11163176448316e-06, "loss": 0.8103, "step": 8709 }, { "epoch": 0.3076529102958643, "grad_norm": 1.472608208656311, "learning_rate": 8.111183998877924e-06, "loss": 0.794, "step": 8710 }, { "epoch": 0.3076882320995722, "grad_norm": 1.8705828189849854, "learning_rate": 8.110736192554103e-06, "loss": 0.8626, "step": 8711 }, { "epoch": 0.30772355390328004, "grad_norm": 1.679585337638855, "learning_rate": 8.110288345517548e-06, "loss": 0.8245, "step": 8712 }, { "epoch": 0.30775887570698796, "grad_norm": 1.8307068347930908, "learning_rate": 8.109840457774126e-06, "loss": 0.8832, "step": 8713 }, { "epoch": 0.30779419751069587, "grad_norm": 1.6568782329559326, "learning_rate": 8.109392529329695e-06, "loss": 0.8388, "step": 8714 }, { "epoch": 0.3078295193144038, "grad_norm": 1.6089926958084106, "learning_rate": 8.10894456019012e-06, "loss": 0.8349, "step": 8715 }, { "epoch": 0.3078648411181117, "grad_norm": 1.7563586235046387, "learning_rate": 8.108496550361263e-06, "loss": 0.8675, "step": 8716 }, { "epoch": 0.3079001629218196, "grad_norm": 1.8162333965301514, "learning_rate": 8.108048499848986e-06, "loss": 0.8419, "step": 8717 }, { "epoch": 0.3079354847255275, "grad_norm": 1.561714768409729, "learning_rate": 8.107600408659155e-06, "loss": 0.8206, "step": 8718 }, { "epoch": 0.3079708065292354, "grad_norm": 1.6809369325637817, "learning_rate": 8.107152276797634e-06, "loss": 0.8239, "step": 8719 }, { "epoch": 0.30800612833294333, "grad_norm": 1.7720221281051636, "learning_rate": 8.106704104270289e-06, "loss": 0.868, "step": 8720 }, { "epoch": 0.30804145013665124, "grad_norm": 1.5414597988128662, "learning_rate": 8.106255891082983e-06, "loss": 0.8309, "step": 8721 }, { "epoch": 0.30807677194035915, "grad_norm": 1.6896893978118896, "learning_rate": 8.105807637241587e-06, "loss": 0.8344, "step": 8722 }, { "epoch": 0.30811209374406706, "grad_norm": 1.5927338600158691, "learning_rate": 8.105359342751962e-06, "loss": 0.8228, "step": 8723 }, { "epoch": 0.30814741554777497, "grad_norm": 1.6010719537734985, "learning_rate": 8.104911007619977e-06, "loss": 0.7984, "step": 8724 }, { "epoch": 0.3081827373514828, "grad_norm": 1.6143441200256348, "learning_rate": 8.104462631851502e-06, "loss": 0.7995, "step": 8725 }, { "epoch": 0.30821805915519074, "grad_norm": 2.331021308898926, "learning_rate": 8.104014215452406e-06, "loss": 0.8321, "step": 8726 }, { "epoch": 0.30825338095889865, "grad_norm": 1.589759349822998, "learning_rate": 8.103565758428552e-06, "loss": 0.8146, "step": 8727 }, { "epoch": 0.30828870276260656, "grad_norm": 1.8768564462661743, "learning_rate": 8.103117260785812e-06, "loss": 0.8277, "step": 8728 }, { "epoch": 0.30832402456631447, "grad_norm": 1.7299988269805908, "learning_rate": 8.102668722530058e-06, "loss": 0.8281, "step": 8729 }, { "epoch": 0.3083593463700224, "grad_norm": 1.731627345085144, "learning_rate": 8.102220143667159e-06, "loss": 0.8337, "step": 8730 }, { "epoch": 0.3083946681737303, "grad_norm": 1.6760785579681396, "learning_rate": 8.101771524202987e-06, "loss": 0.8121, "step": 8731 }, { "epoch": 0.3084299899774382, "grad_norm": 1.5214565992355347, "learning_rate": 8.101322864143411e-06, "loss": 0.7876, "step": 8732 }, { "epoch": 0.3084653117811461, "grad_norm": 1.6070829629898071, "learning_rate": 8.100874163494304e-06, "loss": 0.8152, "step": 8733 }, { "epoch": 0.308500633584854, "grad_norm": 1.6201605796813965, "learning_rate": 8.10042542226154e-06, "loss": 0.8508, "step": 8734 }, { "epoch": 0.30853595538856193, "grad_norm": 1.5270118713378906, "learning_rate": 8.099976640450991e-06, "loss": 0.8532, "step": 8735 }, { "epoch": 0.30857127719226984, "grad_norm": 1.7791250944137573, "learning_rate": 8.099527818068528e-06, "loss": 0.7941, "step": 8736 }, { "epoch": 0.30860659899597775, "grad_norm": 1.8154274225234985, "learning_rate": 8.09907895512003e-06, "loss": 0.8567, "step": 8737 }, { "epoch": 0.3086419207996856, "grad_norm": 1.7347545623779297, "learning_rate": 8.098630051611369e-06, "loss": 0.8295, "step": 8738 }, { "epoch": 0.3086772426033935, "grad_norm": 1.5785092115402222, "learning_rate": 8.09818110754842e-06, "loss": 0.8186, "step": 8739 }, { "epoch": 0.30871256440710143, "grad_norm": 1.6518551111221313, "learning_rate": 8.097732122937059e-06, "loss": 0.7743, "step": 8740 }, { "epoch": 0.30874788621080934, "grad_norm": 1.7197842597961426, "learning_rate": 8.097283097783164e-06, "loss": 0.8283, "step": 8741 }, { "epoch": 0.30878320801451725, "grad_norm": 1.9384452104568481, "learning_rate": 8.096834032092608e-06, "loss": 0.8993, "step": 8742 }, { "epoch": 0.30881852981822516, "grad_norm": 1.6612435579299927, "learning_rate": 8.09638492587127e-06, "loss": 0.8794, "step": 8743 }, { "epoch": 0.3088538516219331, "grad_norm": 1.1467598676681519, "learning_rate": 8.095935779125029e-06, "loss": 0.5983, "step": 8744 }, { "epoch": 0.308889173425641, "grad_norm": 1.6539788246154785, "learning_rate": 8.095486591859763e-06, "loss": 0.8188, "step": 8745 }, { "epoch": 0.3089244952293489, "grad_norm": 1.823181390762329, "learning_rate": 8.095037364081348e-06, "loss": 0.8233, "step": 8746 }, { "epoch": 0.3089598170330568, "grad_norm": 1.8370999097824097, "learning_rate": 8.094588095795668e-06, "loss": 0.8489, "step": 8747 }, { "epoch": 0.3089951388367647, "grad_norm": 1.7930638790130615, "learning_rate": 8.094138787008601e-06, "loss": 0.8479, "step": 8748 }, { "epoch": 0.3090304606404726, "grad_norm": 1.7265238761901855, "learning_rate": 8.093689437726026e-06, "loss": 0.8004, "step": 8749 }, { "epoch": 0.30906578244418054, "grad_norm": 1.653009057044983, "learning_rate": 8.093240047953825e-06, "loss": 0.8143, "step": 8750 }, { "epoch": 0.3091011042478884, "grad_norm": 1.6757750511169434, "learning_rate": 8.092790617697881e-06, "loss": 0.854, "step": 8751 }, { "epoch": 0.3091364260515963, "grad_norm": 1.539276361465454, "learning_rate": 8.092341146964075e-06, "loss": 0.8235, "step": 8752 }, { "epoch": 0.3091717478553042, "grad_norm": 1.5680960416793823, "learning_rate": 8.091891635758288e-06, "loss": 0.8269, "step": 8753 }, { "epoch": 0.3092070696590121, "grad_norm": 1.6839569807052612, "learning_rate": 8.091442084086406e-06, "loss": 0.8559, "step": 8754 }, { "epoch": 0.30924239146272003, "grad_norm": 1.6100813150405884, "learning_rate": 8.090992491954309e-06, "loss": 0.8157, "step": 8755 }, { "epoch": 0.30927771326642794, "grad_norm": 1.4779776334762573, "learning_rate": 8.090542859367885e-06, "loss": 0.8277, "step": 8756 }, { "epoch": 0.30931303507013586, "grad_norm": 1.790789008140564, "learning_rate": 8.090093186333015e-06, "loss": 0.8524, "step": 8757 }, { "epoch": 0.30934835687384377, "grad_norm": 2.016981601715088, "learning_rate": 8.089643472855588e-06, "loss": 0.8388, "step": 8758 }, { "epoch": 0.3093836786775517, "grad_norm": 1.77885901927948, "learning_rate": 8.089193718941489e-06, "loss": 0.8349, "step": 8759 }, { "epoch": 0.3094190004812596, "grad_norm": 1.7404197454452515, "learning_rate": 8.088743924596602e-06, "loss": 0.8233, "step": 8760 }, { "epoch": 0.3094543222849675, "grad_norm": 1.7061599493026733, "learning_rate": 8.088294089826814e-06, "loss": 0.8688, "step": 8761 }, { "epoch": 0.3094896440886754, "grad_norm": 1.6343543529510498, "learning_rate": 8.087844214638014e-06, "loss": 0.8175, "step": 8762 }, { "epoch": 0.3095249658923833, "grad_norm": 1.8920207023620605, "learning_rate": 8.087394299036092e-06, "loss": 0.8182, "step": 8763 }, { "epoch": 0.3095602876960912, "grad_norm": 1.7128630876541138, "learning_rate": 8.08694434302693e-06, "loss": 0.8063, "step": 8764 }, { "epoch": 0.3095956094997991, "grad_norm": 3.1235337257385254, "learning_rate": 8.086494346616422e-06, "loss": 0.8362, "step": 8765 }, { "epoch": 0.309630931303507, "grad_norm": 1.7253730297088623, "learning_rate": 8.086044309810456e-06, "loss": 0.8426, "step": 8766 }, { "epoch": 0.3096662531072149, "grad_norm": 3.0456392765045166, "learning_rate": 8.085594232614923e-06, "loss": 0.8248, "step": 8767 }, { "epoch": 0.3097015749109228, "grad_norm": 1.715004563331604, "learning_rate": 8.085144115035714e-06, "loss": 0.8147, "step": 8768 }, { "epoch": 0.3097368967146307, "grad_norm": 1.7160186767578125, "learning_rate": 8.084693957078717e-06, "loss": 0.8378, "step": 8769 }, { "epoch": 0.30977221851833864, "grad_norm": 1.7038747072219849, "learning_rate": 8.084243758749826e-06, "loss": 0.8491, "step": 8770 }, { "epoch": 0.30980754032204655, "grad_norm": 1.153052568435669, "learning_rate": 8.083793520054932e-06, "loss": 0.5933, "step": 8771 }, { "epoch": 0.30984286212575446, "grad_norm": 1.6932591199874878, "learning_rate": 8.083343240999929e-06, "loss": 0.8376, "step": 8772 }, { "epoch": 0.30987818392946237, "grad_norm": 1.6173982620239258, "learning_rate": 8.082892921590708e-06, "loss": 0.8476, "step": 8773 }, { "epoch": 0.3099135057331703, "grad_norm": 1.7327405214309692, "learning_rate": 8.082442561833163e-06, "loss": 0.8325, "step": 8774 }, { "epoch": 0.3099488275368782, "grad_norm": 1.735573410987854, "learning_rate": 8.081992161733192e-06, "loss": 0.7975, "step": 8775 }, { "epoch": 0.3099841493405861, "grad_norm": 1.5514893531799316, "learning_rate": 8.081541721296686e-06, "loss": 0.8138, "step": 8776 }, { "epoch": 0.31001947114429396, "grad_norm": 1.5878528356552124, "learning_rate": 8.08109124052954e-06, "loss": 0.8734, "step": 8777 }, { "epoch": 0.31005479294800187, "grad_norm": 1.7105767726898193, "learning_rate": 8.080640719437654e-06, "loss": 0.8417, "step": 8778 }, { "epoch": 0.3100901147517098, "grad_norm": 1.7675672769546509, "learning_rate": 8.080190158026917e-06, "loss": 0.8199, "step": 8779 }, { "epoch": 0.3101254365554177, "grad_norm": 1.785860538482666, "learning_rate": 8.079739556303233e-06, "loss": 0.8259, "step": 8780 }, { "epoch": 0.3101607583591256, "grad_norm": 1.8462620973587036, "learning_rate": 8.079288914272496e-06, "loss": 0.8225, "step": 8781 }, { "epoch": 0.3101960801628335, "grad_norm": 1.8020405769348145, "learning_rate": 8.078838231940605e-06, "loss": 0.8097, "step": 8782 }, { "epoch": 0.3102314019665414, "grad_norm": 1.7200566530227661, "learning_rate": 8.078387509313457e-06, "loss": 0.8142, "step": 8783 }, { "epoch": 0.31026672377024933, "grad_norm": 1.5435409545898438, "learning_rate": 8.077936746396952e-06, "loss": 0.7947, "step": 8784 }, { "epoch": 0.31030204557395724, "grad_norm": 1.9003798961639404, "learning_rate": 8.07748594319699e-06, "loss": 0.8286, "step": 8785 }, { "epoch": 0.31033736737766515, "grad_norm": 1.583208441734314, "learning_rate": 8.077035099719471e-06, "loss": 0.8218, "step": 8786 }, { "epoch": 0.31037268918137306, "grad_norm": 1.610544204711914, "learning_rate": 8.076584215970296e-06, "loss": 0.823, "step": 8787 }, { "epoch": 0.310408010985081, "grad_norm": 1.67459237575531, "learning_rate": 8.076133291955363e-06, "loss": 0.8389, "step": 8788 }, { "epoch": 0.3104433327887889, "grad_norm": 2.1229445934295654, "learning_rate": 8.075682327680576e-06, "loss": 0.8367, "step": 8789 }, { "epoch": 0.31047865459249674, "grad_norm": 1.6518663167953491, "learning_rate": 8.075231323151838e-06, "loss": 0.7924, "step": 8790 }, { "epoch": 0.31051397639620465, "grad_norm": 1.4930682182312012, "learning_rate": 8.074780278375048e-06, "loss": 0.8635, "step": 8791 }, { "epoch": 0.31054929819991256, "grad_norm": 1.6074851751327515, "learning_rate": 8.074329193356115e-06, "loss": 0.8405, "step": 8792 }, { "epoch": 0.31058462000362047, "grad_norm": 1.868173599243164, "learning_rate": 8.073878068100936e-06, "loss": 0.8441, "step": 8793 }, { "epoch": 0.3106199418073284, "grad_norm": 1.729265570640564, "learning_rate": 8.073426902615421e-06, "loss": 0.816, "step": 8794 }, { "epoch": 0.3106552636110363, "grad_norm": 1.6266955137252808, "learning_rate": 8.072975696905472e-06, "loss": 0.817, "step": 8795 }, { "epoch": 0.3106905854147442, "grad_norm": 1.7584892511367798, "learning_rate": 8.072524450976996e-06, "loss": 0.8624, "step": 8796 }, { "epoch": 0.3107259072184521, "grad_norm": 1.8247684240341187, "learning_rate": 8.072073164835896e-06, "loss": 0.826, "step": 8797 }, { "epoch": 0.31076122902216, "grad_norm": 1.647975206375122, "learning_rate": 8.071621838488079e-06, "loss": 0.8415, "step": 8798 }, { "epoch": 0.31079655082586793, "grad_norm": 1.6795730590820312, "learning_rate": 8.071170471939454e-06, "loss": 0.8137, "step": 8799 }, { "epoch": 0.31083187262957584, "grad_norm": 1.6176997423171997, "learning_rate": 8.070719065195927e-06, "loss": 0.793, "step": 8800 }, { "epoch": 0.31086719443328376, "grad_norm": 1.939955234527588, "learning_rate": 8.070267618263407e-06, "loss": 0.8384, "step": 8801 }, { "epoch": 0.31090251623699167, "grad_norm": 2.0042970180511475, "learning_rate": 8.069816131147801e-06, "loss": 0.7922, "step": 8802 }, { "epoch": 0.3109378380406995, "grad_norm": 1.8994861841201782, "learning_rate": 8.069364603855019e-06, "loss": 0.798, "step": 8803 }, { "epoch": 0.31097315984440743, "grad_norm": 1.716599702835083, "learning_rate": 8.068913036390969e-06, "loss": 0.8346, "step": 8804 }, { "epoch": 0.31100848164811534, "grad_norm": 1.6613205671310425, "learning_rate": 8.068461428761563e-06, "loss": 0.7948, "step": 8805 }, { "epoch": 0.31104380345182325, "grad_norm": 1.7109922170639038, "learning_rate": 8.06800978097271e-06, "loss": 0.8391, "step": 8806 }, { "epoch": 0.31107912525553116, "grad_norm": 1.7091699838638306, "learning_rate": 8.06755809303032e-06, "loss": 0.7984, "step": 8807 }, { "epoch": 0.3111144470592391, "grad_norm": 1.9545183181762695, "learning_rate": 8.067106364940309e-06, "loss": 0.8857, "step": 8808 }, { "epoch": 0.311149768862947, "grad_norm": 3.5905988216400146, "learning_rate": 8.066654596708585e-06, "loss": 0.8079, "step": 8809 }, { "epoch": 0.3111850906666549, "grad_norm": 1.631200909614563, "learning_rate": 8.066202788341062e-06, "loss": 0.8251, "step": 8810 }, { "epoch": 0.3112204124703628, "grad_norm": 1.564147710800171, "learning_rate": 8.065750939843655e-06, "loss": 0.8035, "step": 8811 }, { "epoch": 0.3112557342740707, "grad_norm": 1.5482381582260132, "learning_rate": 8.065299051222274e-06, "loss": 0.817, "step": 8812 }, { "epoch": 0.3112910560777786, "grad_norm": 1.7739356756210327, "learning_rate": 8.064847122482837e-06, "loss": 0.8221, "step": 8813 }, { "epoch": 0.31132637788148654, "grad_norm": 1.8009361028671265, "learning_rate": 8.064395153631255e-06, "loss": 0.8244, "step": 8814 }, { "epoch": 0.31136169968519445, "grad_norm": 1.8931035995483398, "learning_rate": 8.063943144673448e-06, "loss": 0.8054, "step": 8815 }, { "epoch": 0.3113970214889023, "grad_norm": 1.6992268562316895, "learning_rate": 8.063491095615327e-06, "loss": 0.859, "step": 8816 }, { "epoch": 0.3114323432926102, "grad_norm": 1.5556268692016602, "learning_rate": 8.063039006462811e-06, "loss": 0.794, "step": 8817 }, { "epoch": 0.3114676650963181, "grad_norm": 1.6312010288238525, "learning_rate": 8.062586877221816e-06, "loss": 0.8347, "step": 8818 }, { "epoch": 0.31150298690002604, "grad_norm": 1.7145109176635742, "learning_rate": 8.06213470789826e-06, "loss": 0.8581, "step": 8819 }, { "epoch": 0.31153830870373395, "grad_norm": 1.767076849937439, "learning_rate": 8.06168249849806e-06, "loss": 0.8301, "step": 8820 }, { "epoch": 0.31157363050744186, "grad_norm": 1.683561086654663, "learning_rate": 8.061230249027135e-06, "loss": 0.8136, "step": 8821 }, { "epoch": 0.31160895231114977, "grad_norm": 1.622236967086792, "learning_rate": 8.060777959491405e-06, "loss": 0.8232, "step": 8822 }, { "epoch": 0.3116442741148577, "grad_norm": 1.6504145860671997, "learning_rate": 8.060325629896788e-06, "loss": 0.8096, "step": 8823 }, { "epoch": 0.3116795959185656, "grad_norm": 1.5541952848434448, "learning_rate": 8.059873260249204e-06, "loss": 0.7936, "step": 8824 }, { "epoch": 0.3117149177222735, "grad_norm": 1.8382360935211182, "learning_rate": 8.059420850554574e-06, "loss": 0.8733, "step": 8825 }, { "epoch": 0.3117502395259814, "grad_norm": 1.744665503501892, "learning_rate": 8.058968400818819e-06, "loss": 0.8353, "step": 8826 }, { "epoch": 0.3117855613296893, "grad_norm": 2.5885584354400635, "learning_rate": 8.058515911047862e-06, "loss": 0.8104, "step": 8827 }, { "epoch": 0.31182088313339723, "grad_norm": 1.597565770149231, "learning_rate": 8.058063381247622e-06, "loss": 0.8299, "step": 8828 }, { "epoch": 0.3118562049371051, "grad_norm": 1.699031114578247, "learning_rate": 8.057610811424024e-06, "loss": 0.8214, "step": 8829 }, { "epoch": 0.311891526740813, "grad_norm": 2.424938917160034, "learning_rate": 8.057158201582989e-06, "loss": 0.812, "step": 8830 }, { "epoch": 0.3119268485445209, "grad_norm": 1.8488082885742188, "learning_rate": 8.056705551730444e-06, "loss": 0.8153, "step": 8831 }, { "epoch": 0.3119621703482288, "grad_norm": 1.8438631296157837, "learning_rate": 8.05625286187231e-06, "loss": 0.8452, "step": 8832 }, { "epoch": 0.31199749215193673, "grad_norm": 1.5925875902175903, "learning_rate": 8.055800132014515e-06, "loss": 0.8493, "step": 8833 }, { "epoch": 0.31203281395564464, "grad_norm": 1.7748687267303467, "learning_rate": 8.055347362162981e-06, "loss": 0.8565, "step": 8834 }, { "epoch": 0.31206813575935255, "grad_norm": 1.5692633390426636, "learning_rate": 8.054894552323636e-06, "loss": 0.8441, "step": 8835 }, { "epoch": 0.31210345756306046, "grad_norm": 1.7903192043304443, "learning_rate": 8.054441702502403e-06, "loss": 0.8241, "step": 8836 }, { "epoch": 0.31213877936676837, "grad_norm": 1.8603838682174683, "learning_rate": 8.053988812705214e-06, "loss": 0.8468, "step": 8837 }, { "epoch": 0.3121741011704763, "grad_norm": 1.6377183198928833, "learning_rate": 8.05353588293799e-06, "loss": 0.8038, "step": 8838 }, { "epoch": 0.3122094229741842, "grad_norm": 1.69636070728302, "learning_rate": 8.053082913206666e-06, "loss": 0.8138, "step": 8839 }, { "epoch": 0.3122447447778921, "grad_norm": 1.6191285848617554, "learning_rate": 8.052629903517164e-06, "loss": 0.8414, "step": 8840 }, { "epoch": 0.3122800665816, "grad_norm": 1.6373058557510376, "learning_rate": 8.052176853875418e-06, "loss": 0.8248, "step": 8841 }, { "epoch": 0.31231538838530787, "grad_norm": 2.045297384262085, "learning_rate": 8.051723764287353e-06, "loss": 0.8474, "step": 8842 }, { "epoch": 0.3123507101890158, "grad_norm": 1.9354913234710693, "learning_rate": 8.051270634758903e-06, "loss": 0.8239, "step": 8843 }, { "epoch": 0.3123860319927237, "grad_norm": 1.6483741998672485, "learning_rate": 8.050817465295995e-06, "loss": 0.825, "step": 8844 }, { "epoch": 0.3124213537964316, "grad_norm": 1.7435719966888428, "learning_rate": 8.050364255904564e-06, "loss": 0.8838, "step": 8845 }, { "epoch": 0.3124566756001395, "grad_norm": 1.5878137350082397, "learning_rate": 8.049911006590535e-06, "loss": 0.806, "step": 8846 }, { "epoch": 0.3124919974038474, "grad_norm": 1.755567193031311, "learning_rate": 8.049457717359847e-06, "loss": 0.843, "step": 8847 }, { "epoch": 0.31252731920755533, "grad_norm": 1.645143747329712, "learning_rate": 8.049004388218428e-06, "loss": 0.8076, "step": 8848 }, { "epoch": 0.31256264101126324, "grad_norm": 1.7451281547546387, "learning_rate": 8.048551019172215e-06, "loss": 0.8139, "step": 8849 }, { "epoch": 0.31259796281497115, "grad_norm": 1.6668922901153564, "learning_rate": 8.048097610227137e-06, "loss": 0.8159, "step": 8850 }, { "epoch": 0.31263328461867906, "grad_norm": 1.5923265218734741, "learning_rate": 8.047644161389132e-06, "loss": 0.8163, "step": 8851 }, { "epoch": 0.312668606422387, "grad_norm": 1.6632596254348755, "learning_rate": 8.047190672664133e-06, "loss": 0.8198, "step": 8852 }, { "epoch": 0.3127039282260949, "grad_norm": 1.613760232925415, "learning_rate": 8.046737144058075e-06, "loss": 0.8383, "step": 8853 }, { "epoch": 0.3127392500298028, "grad_norm": 1.791642427444458, "learning_rate": 8.046283575576892e-06, "loss": 0.8416, "step": 8854 }, { "epoch": 0.31277457183351065, "grad_norm": 1.6813786029815674, "learning_rate": 8.045829967226524e-06, "loss": 0.8624, "step": 8855 }, { "epoch": 0.31280989363721856, "grad_norm": 1.7314980030059814, "learning_rate": 8.045376319012907e-06, "loss": 0.8398, "step": 8856 }, { "epoch": 0.31284521544092647, "grad_norm": 1.5519614219665527, "learning_rate": 8.044922630941976e-06, "loss": 0.8231, "step": 8857 }, { "epoch": 0.3128805372446344, "grad_norm": 1.833093285560608, "learning_rate": 8.044468903019668e-06, "loss": 0.8312, "step": 8858 }, { "epoch": 0.3129158590483423, "grad_norm": 1.8293991088867188, "learning_rate": 8.044015135251926e-06, "loss": 0.8576, "step": 8859 }, { "epoch": 0.3129511808520502, "grad_norm": 1.6928133964538574, "learning_rate": 8.043561327644686e-06, "loss": 0.8456, "step": 8860 }, { "epoch": 0.3129865026557581, "grad_norm": 1.6554977893829346, "learning_rate": 8.043107480203886e-06, "loss": 0.7841, "step": 8861 }, { "epoch": 0.313021824459466, "grad_norm": 1.6435327529907227, "learning_rate": 8.04265359293547e-06, "loss": 0.8287, "step": 8862 }, { "epoch": 0.31305714626317394, "grad_norm": 1.5761033296585083, "learning_rate": 8.042199665845374e-06, "loss": 0.8275, "step": 8863 }, { "epoch": 0.31309246806688185, "grad_norm": 1.5056339502334595, "learning_rate": 8.04174569893954e-06, "loss": 0.7898, "step": 8864 }, { "epoch": 0.31312778987058976, "grad_norm": 1.8662530183792114, "learning_rate": 8.041291692223911e-06, "loss": 0.8567, "step": 8865 }, { "epoch": 0.31316311167429767, "grad_norm": 1.5041362047195435, "learning_rate": 8.040837645704428e-06, "loss": 0.8332, "step": 8866 }, { "epoch": 0.3131984334780056, "grad_norm": 1.707160234451294, "learning_rate": 8.040383559387033e-06, "loss": 0.8183, "step": 8867 }, { "epoch": 0.31323375528171343, "grad_norm": 1.6764721870422363, "learning_rate": 8.03992943327767e-06, "loss": 0.7986, "step": 8868 }, { "epoch": 0.31326907708542134, "grad_norm": 1.5989643335342407, "learning_rate": 8.03947526738228e-06, "loss": 0.7856, "step": 8869 }, { "epoch": 0.31330439888912925, "grad_norm": 1.727283000946045, "learning_rate": 8.039021061706812e-06, "loss": 0.7995, "step": 8870 }, { "epoch": 0.31333972069283716, "grad_norm": 1.5871076583862305, "learning_rate": 8.038566816257208e-06, "loss": 0.8267, "step": 8871 }, { "epoch": 0.3133750424965451, "grad_norm": 1.0866302251815796, "learning_rate": 8.03811253103941e-06, "loss": 0.561, "step": 8872 }, { "epoch": 0.313410364300253, "grad_norm": 1.4705276489257812, "learning_rate": 8.037658206059366e-06, "loss": 0.7757, "step": 8873 }, { "epoch": 0.3134456861039609, "grad_norm": 1.5750259160995483, "learning_rate": 8.037203841323025e-06, "loss": 0.824, "step": 8874 }, { "epoch": 0.3134810079076688, "grad_norm": 1.732802152633667, "learning_rate": 8.03674943683633e-06, "loss": 0.8104, "step": 8875 }, { "epoch": 0.3135163297113767, "grad_norm": 1.630225658416748, "learning_rate": 8.036294992605229e-06, "loss": 0.8339, "step": 8876 }, { "epoch": 0.31355165151508463, "grad_norm": 1.6016050577163696, "learning_rate": 8.035840508635671e-06, "loss": 0.8163, "step": 8877 }, { "epoch": 0.31358697331879254, "grad_norm": 1.624299168586731, "learning_rate": 8.0353859849336e-06, "loss": 0.8217, "step": 8878 }, { "epoch": 0.31362229512250045, "grad_norm": 1.6662447452545166, "learning_rate": 8.034931421504972e-06, "loss": 0.816, "step": 8879 }, { "epoch": 0.31365761692620836, "grad_norm": 1.5554884672164917, "learning_rate": 8.03447681835573e-06, "loss": 0.8105, "step": 8880 }, { "epoch": 0.31369293872991627, "grad_norm": 1.6832915544509888, "learning_rate": 8.034022175491825e-06, "loss": 0.8228, "step": 8881 }, { "epoch": 0.3137282605336241, "grad_norm": 1.5409729480743408, "learning_rate": 8.03356749291921e-06, "loss": 0.8149, "step": 8882 }, { "epoch": 0.31376358233733204, "grad_norm": 1.6642467975616455, "learning_rate": 8.033112770643832e-06, "loss": 0.8451, "step": 8883 }, { "epoch": 0.31379890414103995, "grad_norm": 1.6455491781234741, "learning_rate": 8.032658008671646e-06, "loss": 0.8501, "step": 8884 }, { "epoch": 0.31383422594474786, "grad_norm": 1.8582102060317993, "learning_rate": 8.0322032070086e-06, "loss": 0.8133, "step": 8885 }, { "epoch": 0.31386954774845577, "grad_norm": 1.7796741724014282, "learning_rate": 8.03174836566065e-06, "loss": 0.8704, "step": 8886 }, { "epoch": 0.3139048695521637, "grad_norm": 1.7340271472930908, "learning_rate": 8.031293484633746e-06, "loss": 0.8124, "step": 8887 }, { "epoch": 0.3139401913558716, "grad_norm": 1.5984853506088257, "learning_rate": 8.030838563933845e-06, "loss": 0.8073, "step": 8888 }, { "epoch": 0.3139755131595795, "grad_norm": 1.8018064498901367, "learning_rate": 8.030383603566896e-06, "loss": 0.8639, "step": 8889 }, { "epoch": 0.3140108349632874, "grad_norm": 1.60390305519104, "learning_rate": 8.029928603538857e-06, "loss": 0.821, "step": 8890 }, { "epoch": 0.3140461567669953, "grad_norm": 1.6853770017623901, "learning_rate": 8.029473563855684e-06, "loss": 0.8207, "step": 8891 }, { "epoch": 0.31408147857070323, "grad_norm": 1.6941684484481812, "learning_rate": 8.029018484523327e-06, "loss": 0.8236, "step": 8892 }, { "epoch": 0.31411680037441114, "grad_norm": 2.0387446880340576, "learning_rate": 8.028563365547748e-06, "loss": 0.8201, "step": 8893 }, { "epoch": 0.31415212217811905, "grad_norm": 1.497305989265442, "learning_rate": 8.0281082069349e-06, "loss": 0.7886, "step": 8894 }, { "epoch": 0.3141874439818269, "grad_norm": 1.67084538936615, "learning_rate": 8.02765300869074e-06, "loss": 0.8306, "step": 8895 }, { "epoch": 0.3142227657855348, "grad_norm": 1.7242027521133423, "learning_rate": 8.027197770821227e-06, "loss": 0.8329, "step": 8896 }, { "epoch": 0.31425808758924273, "grad_norm": 1.5836446285247803, "learning_rate": 8.02674249333232e-06, "loss": 0.8467, "step": 8897 }, { "epoch": 0.31429340939295064, "grad_norm": 1.6991924047470093, "learning_rate": 8.026287176229976e-06, "loss": 0.8314, "step": 8898 }, { "epoch": 0.31432873119665855, "grad_norm": 1.8175262212753296, "learning_rate": 8.025831819520153e-06, "loss": 0.8477, "step": 8899 }, { "epoch": 0.31436405300036646, "grad_norm": 1.6952826976776123, "learning_rate": 8.025376423208813e-06, "loss": 0.8589, "step": 8900 }, { "epoch": 0.31439937480407437, "grad_norm": 1.800585389137268, "learning_rate": 8.024920987301916e-06, "loss": 0.8605, "step": 8901 }, { "epoch": 0.3144346966077823, "grad_norm": 1.7396125793457031, "learning_rate": 8.02446551180542e-06, "loss": 0.791, "step": 8902 }, { "epoch": 0.3144700184114902, "grad_norm": 1.8429663181304932, "learning_rate": 8.024009996725288e-06, "loss": 0.8562, "step": 8903 }, { "epoch": 0.3145053402151981, "grad_norm": 1.6951574087142944, "learning_rate": 8.023554442067483e-06, "loss": 0.8466, "step": 8904 }, { "epoch": 0.314540662018906, "grad_norm": 1.5348938703536987, "learning_rate": 8.023098847837965e-06, "loss": 0.8347, "step": 8905 }, { "epoch": 0.3145759838226139, "grad_norm": 1.692650556564331, "learning_rate": 8.022643214042698e-06, "loss": 0.8497, "step": 8906 }, { "epoch": 0.31461130562632184, "grad_norm": 1.8603237867355347, "learning_rate": 8.022187540687644e-06, "loss": 0.8769, "step": 8907 }, { "epoch": 0.3146466274300297, "grad_norm": 1.7872389554977417, "learning_rate": 8.021731827778766e-06, "loss": 0.8382, "step": 8908 }, { "epoch": 0.3146819492337376, "grad_norm": 1.5874040126800537, "learning_rate": 8.021276075322034e-06, "loss": 0.8263, "step": 8909 }, { "epoch": 0.3147172710374455, "grad_norm": 1.6185681819915771, "learning_rate": 8.020820283323404e-06, "loss": 0.8243, "step": 8910 }, { "epoch": 0.3147525928411534, "grad_norm": 1.6495842933654785, "learning_rate": 8.02036445178885e-06, "loss": 0.8232, "step": 8911 }, { "epoch": 0.31478791464486133, "grad_norm": 1.692208170890808, "learning_rate": 8.019908580724331e-06, "loss": 0.813, "step": 8912 }, { "epoch": 0.31482323644856924, "grad_norm": 1.7124946117401123, "learning_rate": 8.019452670135819e-06, "loss": 0.8567, "step": 8913 }, { "epoch": 0.31485855825227715, "grad_norm": 1.9049043655395508, "learning_rate": 8.018996720029276e-06, "loss": 0.8258, "step": 8914 }, { "epoch": 0.31489388005598506, "grad_norm": 1.5211005210876465, "learning_rate": 8.018540730410672e-06, "loss": 0.8182, "step": 8915 }, { "epoch": 0.314929201859693, "grad_norm": 1.6022894382476807, "learning_rate": 8.018084701285975e-06, "loss": 0.8366, "step": 8916 }, { "epoch": 0.3149645236634009, "grad_norm": 1.6797192096710205, "learning_rate": 8.017628632661152e-06, "loss": 0.8453, "step": 8917 }, { "epoch": 0.3149998454671088, "grad_norm": 1.6136752367019653, "learning_rate": 8.017172524542171e-06, "loss": 0.8532, "step": 8918 }, { "epoch": 0.3150351672708167, "grad_norm": 1.8516327142715454, "learning_rate": 8.016716376935006e-06, "loss": 0.8166, "step": 8919 }, { "epoch": 0.3150704890745246, "grad_norm": 1.7846609354019165, "learning_rate": 8.016260189845622e-06, "loss": 0.8205, "step": 8920 }, { "epoch": 0.3151058108782325, "grad_norm": 1.679624319076538, "learning_rate": 8.015803963279993e-06, "loss": 0.8058, "step": 8921 }, { "epoch": 0.3151411326819404, "grad_norm": 1.694089651107788, "learning_rate": 8.01534769724409e-06, "loss": 0.8252, "step": 8922 }, { "epoch": 0.3151764544856483, "grad_norm": 1.6855982542037964, "learning_rate": 8.01489139174388e-06, "loss": 0.8327, "step": 8923 }, { "epoch": 0.3152117762893562, "grad_norm": 1.6103299856185913, "learning_rate": 8.01443504678534e-06, "loss": 0.8475, "step": 8924 }, { "epoch": 0.3152470980930641, "grad_norm": 1.7570083141326904, "learning_rate": 8.013978662374442e-06, "loss": 0.8299, "step": 8925 }, { "epoch": 0.315282419896772, "grad_norm": 1.6956427097320557, "learning_rate": 8.013522238517155e-06, "loss": 0.8351, "step": 8926 }, { "epoch": 0.31531774170047994, "grad_norm": 1.793088674545288, "learning_rate": 8.013065775219458e-06, "loss": 0.8068, "step": 8927 }, { "epoch": 0.31535306350418785, "grad_norm": 1.6110416650772095, "learning_rate": 8.012609272487319e-06, "loss": 0.8176, "step": 8928 }, { "epoch": 0.31538838530789576, "grad_norm": 1.5987695455551147, "learning_rate": 8.01215273032672e-06, "loss": 0.8578, "step": 8929 }, { "epoch": 0.31542370711160367, "grad_norm": 1.6550612449645996, "learning_rate": 8.01169614874363e-06, "loss": 0.8546, "step": 8930 }, { "epoch": 0.3154590289153116, "grad_norm": 1.65199875831604, "learning_rate": 8.011239527744027e-06, "loss": 0.8588, "step": 8931 }, { "epoch": 0.3154943507190195, "grad_norm": 1.556300163269043, "learning_rate": 8.010782867333888e-06, "loss": 0.8138, "step": 8932 }, { "epoch": 0.3155296725227274, "grad_norm": 1.684609055519104, "learning_rate": 8.010326167519188e-06, "loss": 0.8101, "step": 8933 }, { "epoch": 0.31556499432643526, "grad_norm": 1.5780566930770874, "learning_rate": 8.009869428305906e-06, "loss": 0.8314, "step": 8934 }, { "epoch": 0.31560031613014317, "grad_norm": 1.6992194652557373, "learning_rate": 8.00941264970002e-06, "loss": 0.872, "step": 8935 }, { "epoch": 0.3156356379338511, "grad_norm": 1.8585153818130493, "learning_rate": 8.008955831707503e-06, "loss": 0.8132, "step": 8936 }, { "epoch": 0.315670959737559, "grad_norm": 1.603073000907898, "learning_rate": 8.00849897433434e-06, "loss": 0.8286, "step": 8937 }, { "epoch": 0.3157062815412669, "grad_norm": 1.7113231420516968, "learning_rate": 8.008042077586507e-06, "loss": 0.8231, "step": 8938 }, { "epoch": 0.3157416033449748, "grad_norm": 2.7609612941741943, "learning_rate": 8.007585141469986e-06, "loss": 0.8253, "step": 8939 }, { "epoch": 0.3157769251486827, "grad_norm": 1.6472229957580566, "learning_rate": 8.007128165990756e-06, "loss": 0.841, "step": 8940 }, { "epoch": 0.31581224695239063, "grad_norm": 1.6445873975753784, "learning_rate": 8.006671151154796e-06, "loss": 0.8247, "step": 8941 }, { "epoch": 0.31584756875609854, "grad_norm": 1.6885043382644653, "learning_rate": 8.006214096968093e-06, "loss": 0.8257, "step": 8942 }, { "epoch": 0.31588289055980645, "grad_norm": 1.7015745639801025, "learning_rate": 8.00575700343662e-06, "loss": 0.8507, "step": 8943 }, { "epoch": 0.31591821236351436, "grad_norm": 1.8167004585266113, "learning_rate": 8.005299870566369e-06, "loss": 0.8345, "step": 8944 }, { "epoch": 0.31595353416722227, "grad_norm": 1.5643417835235596, "learning_rate": 8.004842698363315e-06, "loss": 0.7945, "step": 8945 }, { "epoch": 0.3159888559709302, "grad_norm": 1.6293352842330933, "learning_rate": 8.004385486833447e-06, "loss": 0.8098, "step": 8946 }, { "epoch": 0.31602417777463804, "grad_norm": 1.5651521682739258, "learning_rate": 8.003928235982745e-06, "loss": 0.8099, "step": 8947 }, { "epoch": 0.31605949957834595, "grad_norm": 1.4970715045928955, "learning_rate": 8.003470945817196e-06, "loss": 0.7999, "step": 8948 }, { "epoch": 0.31609482138205386, "grad_norm": 1.781219244003296, "learning_rate": 8.003013616342782e-06, "loss": 0.8878, "step": 8949 }, { "epoch": 0.31613014318576177, "grad_norm": 1.7606499195098877, "learning_rate": 8.002556247565492e-06, "loss": 0.8184, "step": 8950 }, { "epoch": 0.3161654649894697, "grad_norm": 1.738610863685608, "learning_rate": 8.002098839491308e-06, "loss": 0.8346, "step": 8951 }, { "epoch": 0.3162007867931776, "grad_norm": 1.5991657972335815, "learning_rate": 8.00164139212622e-06, "loss": 0.8481, "step": 8952 }, { "epoch": 0.3162361085968855, "grad_norm": 1.5954424142837524, "learning_rate": 8.001183905476216e-06, "loss": 0.8127, "step": 8953 }, { "epoch": 0.3162714304005934, "grad_norm": 1.86850905418396, "learning_rate": 8.000726379547279e-06, "loss": 0.8084, "step": 8954 }, { "epoch": 0.3163067522043013, "grad_norm": 1.0638134479522705, "learning_rate": 8.0002688143454e-06, "loss": 0.609, "step": 8955 }, { "epoch": 0.31634207400800923, "grad_norm": 1.629150629043579, "learning_rate": 7.999811209876566e-06, "loss": 0.8101, "step": 8956 }, { "epoch": 0.31637739581171714, "grad_norm": 1.7434853315353394, "learning_rate": 7.999353566146766e-06, "loss": 0.8383, "step": 8957 }, { "epoch": 0.31641271761542505, "grad_norm": 1.7433300018310547, "learning_rate": 7.998895883161993e-06, "loss": 0.8, "step": 8958 }, { "epoch": 0.31644803941913296, "grad_norm": 1.4573789834976196, "learning_rate": 7.998438160928232e-06, "loss": 0.8258, "step": 8959 }, { "epoch": 0.3164833612228408, "grad_norm": 1.5940146446228027, "learning_rate": 7.997980399451475e-06, "loss": 0.8126, "step": 8960 }, { "epoch": 0.31651868302654873, "grad_norm": 1.7803822755813599, "learning_rate": 7.997522598737717e-06, "loss": 0.7928, "step": 8961 }, { "epoch": 0.31655400483025664, "grad_norm": 1.7522517442703247, "learning_rate": 7.997064758792946e-06, "loss": 0.8255, "step": 8962 }, { "epoch": 0.31658932663396455, "grad_norm": 1.6055912971496582, "learning_rate": 7.996606879623155e-06, "loss": 0.8392, "step": 8963 }, { "epoch": 0.31662464843767246, "grad_norm": 1.8212069272994995, "learning_rate": 7.996148961234336e-06, "loss": 0.8087, "step": 8964 }, { "epoch": 0.3166599702413804, "grad_norm": 1.5740079879760742, "learning_rate": 7.995691003632481e-06, "loss": 0.8338, "step": 8965 }, { "epoch": 0.3166952920450883, "grad_norm": 1.9752888679504395, "learning_rate": 7.995233006823587e-06, "loss": 0.8261, "step": 8966 }, { "epoch": 0.3167306138487962, "grad_norm": 1.745046854019165, "learning_rate": 7.994774970813645e-06, "loss": 0.8363, "step": 8967 }, { "epoch": 0.3167659356525041, "grad_norm": 1.628227710723877, "learning_rate": 7.994316895608653e-06, "loss": 0.8182, "step": 8968 }, { "epoch": 0.316801257456212, "grad_norm": 1.6445726156234741, "learning_rate": 7.993858781214604e-06, "loss": 0.8285, "step": 8969 }, { "epoch": 0.3168365792599199, "grad_norm": 1.566965103149414, "learning_rate": 7.993400627637492e-06, "loss": 0.8384, "step": 8970 }, { "epoch": 0.31687190106362784, "grad_norm": 1.513245701789856, "learning_rate": 7.99294243488332e-06, "loss": 0.8328, "step": 8971 }, { "epoch": 0.31690722286733575, "grad_norm": 1.657441258430481, "learning_rate": 7.992484202958076e-06, "loss": 0.7967, "step": 8972 }, { "epoch": 0.3169425446710436, "grad_norm": 2.615398645401001, "learning_rate": 7.992025931867762e-06, "loss": 0.8088, "step": 8973 }, { "epoch": 0.3169778664747515, "grad_norm": 1.0158474445343018, "learning_rate": 7.991567621618376e-06, "loss": 0.6289, "step": 8974 }, { "epoch": 0.3170131882784594, "grad_norm": 1.7393277883529663, "learning_rate": 7.991109272215913e-06, "loss": 0.8656, "step": 8975 }, { "epoch": 0.31704851008216733, "grad_norm": 1.6057422161102295, "learning_rate": 7.990650883666378e-06, "loss": 0.8026, "step": 8976 }, { "epoch": 0.31708383188587524, "grad_norm": 1.8749215602874756, "learning_rate": 7.990192455975762e-06, "loss": 0.7931, "step": 8977 }, { "epoch": 0.31711915368958316, "grad_norm": 1.5625513792037964, "learning_rate": 7.989733989150073e-06, "loss": 0.8388, "step": 8978 }, { "epoch": 0.31715447549329107, "grad_norm": 1.743920087814331, "learning_rate": 7.989275483195305e-06, "loss": 0.8084, "step": 8979 }, { "epoch": 0.317189797296999, "grad_norm": 1.5582067966461182, "learning_rate": 7.988816938117462e-06, "loss": 0.829, "step": 8980 }, { "epoch": 0.3172251191007069, "grad_norm": 1.828942060470581, "learning_rate": 7.988358353922546e-06, "loss": 0.8393, "step": 8981 }, { "epoch": 0.3172604409044148, "grad_norm": 1.706723928451538, "learning_rate": 7.987899730616557e-06, "loss": 0.8376, "step": 8982 }, { "epoch": 0.3172957627081227, "grad_norm": 1.5008604526519775, "learning_rate": 7.987441068205496e-06, "loss": 0.8089, "step": 8983 }, { "epoch": 0.3173310845118306, "grad_norm": 1.7739999294281006, "learning_rate": 7.98698236669537e-06, "loss": 0.8196, "step": 8984 }, { "epoch": 0.31736640631553853, "grad_norm": 1.6614744663238525, "learning_rate": 7.98652362609218e-06, "loss": 0.7902, "step": 8985 }, { "epoch": 0.3174017281192464, "grad_norm": 1.674152135848999, "learning_rate": 7.986064846401931e-06, "loss": 0.847, "step": 8986 }, { "epoch": 0.3174370499229543, "grad_norm": 1.6539644002914429, "learning_rate": 7.985606027630626e-06, "loss": 0.8418, "step": 8987 }, { "epoch": 0.3174723717266622, "grad_norm": 1.603338360786438, "learning_rate": 7.985147169784269e-06, "loss": 0.8255, "step": 8988 }, { "epoch": 0.3175076935303701, "grad_norm": 1.8010294437408447, "learning_rate": 7.98468827286887e-06, "loss": 0.871, "step": 8989 }, { "epoch": 0.317543015334078, "grad_norm": 1.6409478187561035, "learning_rate": 7.98422933689043e-06, "loss": 0.8218, "step": 8990 }, { "epoch": 0.31757833713778594, "grad_norm": 1.6601754426956177, "learning_rate": 7.983770361854956e-06, "loss": 0.8499, "step": 8991 }, { "epoch": 0.31761365894149385, "grad_norm": 1.5299381017684937, "learning_rate": 7.983311347768459e-06, "loss": 0.8053, "step": 8992 }, { "epoch": 0.31764898074520176, "grad_norm": 1.7062503099441528, "learning_rate": 7.982852294636943e-06, "loss": 0.8265, "step": 8993 }, { "epoch": 0.31768430254890967, "grad_norm": 3.1213183403015137, "learning_rate": 7.982393202466417e-06, "loss": 0.8291, "step": 8994 }, { "epoch": 0.3177196243526176, "grad_norm": 1.8301441669464111, "learning_rate": 7.98193407126289e-06, "loss": 0.817, "step": 8995 }, { "epoch": 0.3177549461563255, "grad_norm": 1.7178069353103638, "learning_rate": 7.981474901032369e-06, "loss": 0.8211, "step": 8996 }, { "epoch": 0.3177902679600334, "grad_norm": 1.8064972162246704, "learning_rate": 7.981015691780864e-06, "loss": 0.8219, "step": 8997 }, { "epoch": 0.3178255897637413, "grad_norm": 1.6815459728240967, "learning_rate": 7.980556443514387e-06, "loss": 0.8223, "step": 8998 }, { "epoch": 0.31786091156744917, "grad_norm": 1.7544759511947632, "learning_rate": 7.98009715623895e-06, "loss": 0.8323, "step": 8999 }, { "epoch": 0.3178962333711571, "grad_norm": 1.7196502685546875, "learning_rate": 7.979637829960558e-06, "loss": 0.8261, "step": 9000 }, { "epoch": 0.317931555174865, "grad_norm": 1.7823385000228882, "learning_rate": 7.979178464685229e-06, "loss": 0.8418, "step": 9001 }, { "epoch": 0.3179668769785729, "grad_norm": 1.7751446962356567, "learning_rate": 7.978719060418971e-06, "loss": 0.8635, "step": 9002 }, { "epoch": 0.3180021987822808, "grad_norm": 1.6788135766983032, "learning_rate": 7.978259617167798e-06, "loss": 0.8481, "step": 9003 }, { "epoch": 0.3180375205859887, "grad_norm": 1.6453317403793335, "learning_rate": 7.977800134937722e-06, "loss": 0.7983, "step": 9004 }, { "epoch": 0.31807284238969663, "grad_norm": 2.49174165725708, "learning_rate": 7.97734061373476e-06, "loss": 0.8147, "step": 9005 }, { "epoch": 0.31810816419340454, "grad_norm": 1.57900071144104, "learning_rate": 7.976881053564921e-06, "loss": 0.8545, "step": 9006 }, { "epoch": 0.31814348599711245, "grad_norm": 1.8492196798324585, "learning_rate": 7.976421454434223e-06, "loss": 0.8344, "step": 9007 }, { "epoch": 0.31817880780082036, "grad_norm": 1.6865087747573853, "learning_rate": 7.975961816348684e-06, "loss": 0.8425, "step": 9008 }, { "epoch": 0.3182141296045283, "grad_norm": 1.6692215204238892, "learning_rate": 7.975502139314311e-06, "loss": 0.8411, "step": 9009 }, { "epoch": 0.3182494514082362, "grad_norm": 1.7236878871917725, "learning_rate": 7.975042423337129e-06, "loss": 0.851, "step": 9010 }, { "epoch": 0.3182847732119441, "grad_norm": 1.489552617073059, "learning_rate": 7.974582668423149e-06, "loss": 0.8263, "step": 9011 }, { "epoch": 0.31832009501565195, "grad_norm": 1.5342488288879395, "learning_rate": 7.97412287457839e-06, "loss": 0.8436, "step": 9012 }, { "epoch": 0.31835541681935986, "grad_norm": 1.6117939949035645, "learning_rate": 7.97366304180887e-06, "loss": 0.8037, "step": 9013 }, { "epoch": 0.31839073862306777, "grad_norm": 1.7991688251495361, "learning_rate": 7.97320317012061e-06, "loss": 0.805, "step": 9014 }, { "epoch": 0.3184260604267757, "grad_norm": 1.6608511209487915, "learning_rate": 7.972743259519623e-06, "loss": 0.8195, "step": 9015 }, { "epoch": 0.3184613822304836, "grad_norm": 1.9771119356155396, "learning_rate": 7.972283310011931e-06, "loss": 0.848, "step": 9016 }, { "epoch": 0.3184967040341915, "grad_norm": 1.8955384492874146, "learning_rate": 7.971823321603555e-06, "loss": 0.8247, "step": 9017 }, { "epoch": 0.3185320258378994, "grad_norm": 1.975837230682373, "learning_rate": 7.971363294300513e-06, "loss": 0.8032, "step": 9018 }, { "epoch": 0.3185673476416073, "grad_norm": 1.7102199792861938, "learning_rate": 7.970903228108829e-06, "loss": 0.83, "step": 9019 }, { "epoch": 0.31860266944531523, "grad_norm": 1.6351945400238037, "learning_rate": 7.970443123034519e-06, "loss": 0.7968, "step": 9020 }, { "epoch": 0.31863799124902314, "grad_norm": 5.107234001159668, "learning_rate": 7.969982979083609e-06, "loss": 0.8504, "step": 9021 }, { "epoch": 0.31867331305273106, "grad_norm": 1.8318066596984863, "learning_rate": 7.96952279626212e-06, "loss": 0.7988, "step": 9022 }, { "epoch": 0.31870863485643897, "grad_norm": 1.7698523998260498, "learning_rate": 7.969062574576076e-06, "loss": 0.8241, "step": 9023 }, { "epoch": 0.3187439566601469, "grad_norm": 2.0254361629486084, "learning_rate": 7.968602314031498e-06, "loss": 0.8622, "step": 9024 }, { "epoch": 0.31877927846385473, "grad_norm": 1.662683367729187, "learning_rate": 7.968142014634411e-06, "loss": 0.8325, "step": 9025 }, { "epoch": 0.31881460026756264, "grad_norm": 1.525856614112854, "learning_rate": 7.96768167639084e-06, "loss": 0.8439, "step": 9026 }, { "epoch": 0.31884992207127055, "grad_norm": 1.6400002241134644, "learning_rate": 7.967221299306808e-06, "loss": 0.8204, "step": 9027 }, { "epoch": 0.31888524387497846, "grad_norm": 1.7251118421554565, "learning_rate": 7.966760883388342e-06, "loss": 0.8329, "step": 9028 }, { "epoch": 0.3189205656786864, "grad_norm": 1.5945653915405273, "learning_rate": 7.966300428641467e-06, "loss": 0.819, "step": 9029 }, { "epoch": 0.3189558874823943, "grad_norm": 1.8672256469726562, "learning_rate": 7.965839935072208e-06, "loss": 0.8673, "step": 9030 }, { "epoch": 0.3189912092861022, "grad_norm": 1.5354965925216675, "learning_rate": 7.965379402686596e-06, "loss": 0.8122, "step": 9031 }, { "epoch": 0.3190265310898101, "grad_norm": 1.6050556898117065, "learning_rate": 7.964918831490655e-06, "loss": 0.8083, "step": 9032 }, { "epoch": 0.319061852893518, "grad_norm": 1.8861502408981323, "learning_rate": 7.964458221490413e-06, "loss": 0.854, "step": 9033 }, { "epoch": 0.3190971746972259, "grad_norm": 1.8182992935180664, "learning_rate": 7.9639975726919e-06, "loss": 0.8067, "step": 9034 }, { "epoch": 0.31913249650093384, "grad_norm": 1.722554326057434, "learning_rate": 7.963536885101143e-06, "loss": 0.8317, "step": 9035 }, { "epoch": 0.31916781830464175, "grad_norm": 1.6426736116409302, "learning_rate": 7.963076158724175e-06, "loss": 0.8078, "step": 9036 }, { "epoch": 0.31920314010834966, "grad_norm": 1.5653384923934937, "learning_rate": 7.96261539356702e-06, "loss": 0.8132, "step": 9037 }, { "epoch": 0.3192384619120575, "grad_norm": 1.8109363317489624, "learning_rate": 7.962154589635712e-06, "loss": 0.8564, "step": 9038 }, { "epoch": 0.3192737837157654, "grad_norm": 2.089677333831787, "learning_rate": 7.961693746936283e-06, "loss": 0.8611, "step": 9039 }, { "epoch": 0.31930910551947334, "grad_norm": 1.6329517364501953, "learning_rate": 7.961232865474762e-06, "loss": 0.8197, "step": 9040 }, { "epoch": 0.31934442732318125, "grad_norm": 1.7159738540649414, "learning_rate": 7.960771945257182e-06, "loss": 0.8269, "step": 9041 }, { "epoch": 0.31937974912688916, "grad_norm": 1.5243184566497803, "learning_rate": 7.960310986289575e-06, "loss": 0.8019, "step": 9042 }, { "epoch": 0.31941507093059707, "grad_norm": 1.6305673122406006, "learning_rate": 7.959849988577976e-06, "loss": 0.7738, "step": 9043 }, { "epoch": 0.319450392734305, "grad_norm": 1.6705400943756104, "learning_rate": 7.959388952128415e-06, "loss": 0.8047, "step": 9044 }, { "epoch": 0.3194857145380129, "grad_norm": 1.7414591312408447, "learning_rate": 7.958927876946929e-06, "loss": 0.8387, "step": 9045 }, { "epoch": 0.3195210363417208, "grad_norm": 1.7216486930847168, "learning_rate": 7.95846676303955e-06, "loss": 0.8739, "step": 9046 }, { "epoch": 0.3195563581454287, "grad_norm": 1.7937885522842407, "learning_rate": 7.958005610412314e-06, "loss": 0.8153, "step": 9047 }, { "epoch": 0.3195916799491366, "grad_norm": 1.8755404949188232, "learning_rate": 7.957544419071258e-06, "loss": 0.8055, "step": 9048 }, { "epoch": 0.31962700175284453, "grad_norm": 1.694589376449585, "learning_rate": 7.957083189022416e-06, "loss": 0.794, "step": 9049 }, { "epoch": 0.31966232355655244, "grad_norm": 1.7832976579666138, "learning_rate": 7.956621920271826e-06, "loss": 0.8288, "step": 9050 }, { "epoch": 0.3196976453602603, "grad_norm": 1.927038550376892, "learning_rate": 7.956160612825523e-06, "loss": 0.8048, "step": 9051 }, { "epoch": 0.3197329671639682, "grad_norm": 1.6391260623931885, "learning_rate": 7.955699266689545e-06, "loss": 0.8255, "step": 9052 }, { "epoch": 0.3197682889676761, "grad_norm": 2.8903377056121826, "learning_rate": 7.955237881869931e-06, "loss": 0.8075, "step": 9053 }, { "epoch": 0.31980361077138403, "grad_norm": 1.7082602977752686, "learning_rate": 7.954776458372718e-06, "loss": 0.8102, "step": 9054 }, { "epoch": 0.31983893257509194, "grad_norm": 1.8235077857971191, "learning_rate": 7.95431499620395e-06, "loss": 0.8329, "step": 9055 }, { "epoch": 0.31987425437879985, "grad_norm": 1.5304639339447021, "learning_rate": 7.953853495369658e-06, "loss": 0.8196, "step": 9056 }, { "epoch": 0.31990957618250776, "grad_norm": 1.5453635454177856, "learning_rate": 7.95339195587589e-06, "loss": 0.8125, "step": 9057 }, { "epoch": 0.31994489798621567, "grad_norm": 1.8407230377197266, "learning_rate": 7.952930377728681e-06, "loss": 0.8131, "step": 9058 }, { "epoch": 0.3199802197899236, "grad_norm": 2.3818721771240234, "learning_rate": 7.952468760934076e-06, "loss": 0.8068, "step": 9059 }, { "epoch": 0.3200155415936315, "grad_norm": 1.922843098640442, "learning_rate": 7.952007105498115e-06, "loss": 0.8098, "step": 9060 }, { "epoch": 0.3200508633973394, "grad_norm": 1.5034822225570679, "learning_rate": 7.951545411426838e-06, "loss": 0.7793, "step": 9061 }, { "epoch": 0.3200861852010473, "grad_norm": 1.5844444036483765, "learning_rate": 7.95108367872629e-06, "loss": 0.8344, "step": 9062 }, { "epoch": 0.3201215070047552, "grad_norm": 1.7289742231369019, "learning_rate": 7.950621907402516e-06, "loss": 0.8189, "step": 9063 }, { "epoch": 0.3201568288084631, "grad_norm": 2.043097734451294, "learning_rate": 7.950160097461555e-06, "loss": 0.8565, "step": 9064 }, { "epoch": 0.320192150612171, "grad_norm": 1.5322457551956177, "learning_rate": 7.949698248909455e-06, "loss": 0.8225, "step": 9065 }, { "epoch": 0.3202274724158789, "grad_norm": 1.6954091787338257, "learning_rate": 7.949236361752256e-06, "loss": 0.8566, "step": 9066 }, { "epoch": 0.3202627942195868, "grad_norm": 1.5853837728500366, "learning_rate": 7.948774435996008e-06, "loss": 0.7763, "step": 9067 }, { "epoch": 0.3202981160232947, "grad_norm": 1.6123032569885254, "learning_rate": 7.948312471646754e-06, "loss": 0.8159, "step": 9068 }, { "epoch": 0.32033343782700263, "grad_norm": 1.6353329420089722, "learning_rate": 7.947850468710539e-06, "loss": 0.7994, "step": 9069 }, { "epoch": 0.32036875963071054, "grad_norm": 1.5800939798355103, "learning_rate": 7.947388427193411e-06, "loss": 0.8272, "step": 9070 }, { "epoch": 0.32040408143441845, "grad_norm": 1.8385043144226074, "learning_rate": 7.94692634710142e-06, "loss": 0.8076, "step": 9071 }, { "epoch": 0.32043940323812636, "grad_norm": 1.7943857908248901, "learning_rate": 7.94646422844061e-06, "loss": 0.8547, "step": 9072 }, { "epoch": 0.3204747250418343, "grad_norm": 1.8784199953079224, "learning_rate": 7.946002071217031e-06, "loss": 0.8201, "step": 9073 }, { "epoch": 0.3205100468455422, "grad_norm": 1.7096892595291138, "learning_rate": 7.94553987543673e-06, "loss": 0.8451, "step": 9074 }, { "epoch": 0.3205453686492501, "grad_norm": 1.537743091583252, "learning_rate": 7.945077641105756e-06, "loss": 0.7883, "step": 9075 }, { "epoch": 0.320580690452958, "grad_norm": 1.6223686933517456, "learning_rate": 7.94461536823016e-06, "loss": 0.842, "step": 9076 }, { "epoch": 0.32061601225666586, "grad_norm": 1.0957446098327637, "learning_rate": 7.944153056815991e-06, "loss": 0.6038, "step": 9077 }, { "epoch": 0.32065133406037377, "grad_norm": 1.7123764753341675, "learning_rate": 7.9436907068693e-06, "loss": 0.7916, "step": 9078 }, { "epoch": 0.3206866558640817, "grad_norm": 2.075425863265991, "learning_rate": 7.94322831839614e-06, "loss": 0.833, "step": 9079 }, { "epoch": 0.3207219776677896, "grad_norm": 1.5888381004333496, "learning_rate": 7.94276589140256e-06, "loss": 0.8579, "step": 9080 }, { "epoch": 0.3207572994714975, "grad_norm": 0.9140039682388306, "learning_rate": 7.942303425894614e-06, "loss": 0.6015, "step": 9081 }, { "epoch": 0.3207926212752054, "grad_norm": 2.021268606185913, "learning_rate": 7.941840921878352e-06, "loss": 0.8462, "step": 9082 }, { "epoch": 0.3208279430789133, "grad_norm": 1.945953130722046, "learning_rate": 7.94137837935983e-06, "loss": 0.8312, "step": 9083 }, { "epoch": 0.32086326488262124, "grad_norm": 1.8818761110305786, "learning_rate": 7.940915798345102e-06, "loss": 0.8314, "step": 9084 }, { "epoch": 0.32089858668632915, "grad_norm": 1.618962287902832, "learning_rate": 7.94045317884022e-06, "loss": 0.79, "step": 9085 }, { "epoch": 0.32093390849003706, "grad_norm": 2.0707545280456543, "learning_rate": 7.939990520851238e-06, "loss": 0.8164, "step": 9086 }, { "epoch": 0.32096923029374497, "grad_norm": 1.66593599319458, "learning_rate": 7.939527824384214e-06, "loss": 0.8372, "step": 9087 }, { "epoch": 0.3210045520974529, "grad_norm": 1.560324788093567, "learning_rate": 7.939065089445201e-06, "loss": 0.7968, "step": 9088 }, { "epoch": 0.3210398739011608, "grad_norm": 1.9065433740615845, "learning_rate": 7.938602316040259e-06, "loss": 0.8507, "step": 9089 }, { "epoch": 0.32107519570486864, "grad_norm": 1.536770224571228, "learning_rate": 7.938139504175441e-06, "loss": 0.8236, "step": 9090 }, { "epoch": 0.32111051750857655, "grad_norm": 1.8449957370758057, "learning_rate": 7.937676653856805e-06, "loss": 0.8137, "step": 9091 }, { "epoch": 0.32114583931228446, "grad_norm": 1.7638928890228271, "learning_rate": 7.937213765090412e-06, "loss": 0.8282, "step": 9092 }, { "epoch": 0.3211811611159924, "grad_norm": 1.7657597064971924, "learning_rate": 7.936750837882315e-06, "loss": 0.8146, "step": 9093 }, { "epoch": 0.3212164829197003, "grad_norm": 1.661002278327942, "learning_rate": 7.936287872238574e-06, "loss": 0.8059, "step": 9094 }, { "epoch": 0.3212518047234082, "grad_norm": 1.7066388130187988, "learning_rate": 7.935824868165252e-06, "loss": 0.8276, "step": 9095 }, { "epoch": 0.3212871265271161, "grad_norm": 1.8133275508880615, "learning_rate": 7.935361825668403e-06, "loss": 0.8059, "step": 9096 }, { "epoch": 0.321322448330824, "grad_norm": 1.5212750434875488, "learning_rate": 7.934898744754092e-06, "loss": 0.807, "step": 9097 }, { "epoch": 0.32135777013453193, "grad_norm": 1.3763890266418457, "learning_rate": 7.934435625428378e-06, "loss": 0.7792, "step": 9098 }, { "epoch": 0.32139309193823984, "grad_norm": 1.6883920431137085, "learning_rate": 7.933972467697322e-06, "loss": 0.852, "step": 9099 }, { "epoch": 0.32142841374194775, "grad_norm": 1.1091080904006958, "learning_rate": 7.933509271566987e-06, "loss": 0.6115, "step": 9100 }, { "epoch": 0.32146373554565566, "grad_norm": 1.771403431892395, "learning_rate": 7.933046037043432e-06, "loss": 0.7772, "step": 9101 }, { "epoch": 0.32149905734936357, "grad_norm": 1.636918067932129, "learning_rate": 7.932582764132723e-06, "loss": 0.8066, "step": 9102 }, { "epoch": 0.3215343791530714, "grad_norm": 1.8076372146606445, "learning_rate": 7.932119452840922e-06, "loss": 0.7969, "step": 9103 }, { "epoch": 0.32156970095677934, "grad_norm": 1.5852631330490112, "learning_rate": 7.931656103174092e-06, "loss": 0.8112, "step": 9104 }, { "epoch": 0.32160502276048725, "grad_norm": 1.916111707687378, "learning_rate": 7.931192715138299e-06, "loss": 0.8398, "step": 9105 }, { "epoch": 0.32164034456419516, "grad_norm": 1.5943492650985718, "learning_rate": 7.930729288739606e-06, "loss": 0.82, "step": 9106 }, { "epoch": 0.32167566636790307, "grad_norm": 1.6431316137313843, "learning_rate": 7.93026582398408e-06, "loss": 0.8224, "step": 9107 }, { "epoch": 0.321710988171611, "grad_norm": 1.7894033193588257, "learning_rate": 7.929802320877783e-06, "loss": 0.8684, "step": 9108 }, { "epoch": 0.3217463099753189, "grad_norm": 1.7231857776641846, "learning_rate": 7.929338779426787e-06, "loss": 0.8277, "step": 9109 }, { "epoch": 0.3217816317790268, "grad_norm": 1.606754183769226, "learning_rate": 7.928875199637156e-06, "loss": 0.793, "step": 9110 }, { "epoch": 0.3218169535827347, "grad_norm": 1.596461296081543, "learning_rate": 7.928411581514954e-06, "loss": 0.805, "step": 9111 }, { "epoch": 0.3218522753864426, "grad_norm": 1.821795105934143, "learning_rate": 7.927947925066253e-06, "loss": 0.8681, "step": 9112 }, { "epoch": 0.32188759719015053, "grad_norm": 1.6832870244979858, "learning_rate": 7.927484230297121e-06, "loss": 0.8399, "step": 9113 }, { "epoch": 0.32192291899385844, "grad_norm": 1.7088632583618164, "learning_rate": 7.927020497213626e-06, "loss": 0.8364, "step": 9114 }, { "epoch": 0.32195824079756635, "grad_norm": 1.7936195135116577, "learning_rate": 7.926556725821834e-06, "loss": 0.8287, "step": 9115 }, { "epoch": 0.3219935626012742, "grad_norm": 1.79188072681427, "learning_rate": 7.926092916127822e-06, "loss": 0.8405, "step": 9116 }, { "epoch": 0.3220288844049821, "grad_norm": 1.6045386791229248, "learning_rate": 7.925629068137653e-06, "loss": 0.8607, "step": 9117 }, { "epoch": 0.32206420620869003, "grad_norm": 1.5150824785232544, "learning_rate": 7.9251651818574e-06, "loss": 0.7983, "step": 9118 }, { "epoch": 0.32209952801239794, "grad_norm": 1.5455288887023926, "learning_rate": 7.924701257293137e-06, "loss": 0.827, "step": 9119 }, { "epoch": 0.32213484981610585, "grad_norm": 1.8927862644195557, "learning_rate": 7.924237294450934e-06, "loss": 0.8656, "step": 9120 }, { "epoch": 0.32217017161981376, "grad_norm": 1.8709521293640137, "learning_rate": 7.92377329333686e-06, "loss": 0.8417, "step": 9121 }, { "epoch": 0.32220549342352167, "grad_norm": 1.7046386003494263, "learning_rate": 7.923309253956995e-06, "loss": 0.8362, "step": 9122 }, { "epoch": 0.3222408152272296, "grad_norm": 1.6034075021743774, "learning_rate": 7.922845176317404e-06, "loss": 0.8051, "step": 9123 }, { "epoch": 0.3222761370309375, "grad_norm": 1.7804440259933472, "learning_rate": 7.922381060424166e-06, "loss": 0.8595, "step": 9124 }, { "epoch": 0.3223114588346454, "grad_norm": 1.5642247200012207, "learning_rate": 7.921916906283356e-06, "loss": 0.7752, "step": 9125 }, { "epoch": 0.3223467806383533, "grad_norm": 2.0505428314208984, "learning_rate": 7.921452713901046e-06, "loss": 0.8376, "step": 9126 }, { "epoch": 0.3223821024420612, "grad_norm": 1.5948392152786255, "learning_rate": 7.92098848328331e-06, "loss": 0.8487, "step": 9127 }, { "epoch": 0.32241742424576914, "grad_norm": 1.7266935110092163, "learning_rate": 7.920524214436227e-06, "loss": 0.8296, "step": 9128 }, { "epoch": 0.322452746049477, "grad_norm": 1.4927040338516235, "learning_rate": 7.920059907365874e-06, "loss": 0.8351, "step": 9129 }, { "epoch": 0.3224880678531849, "grad_norm": 1.9577265977859497, "learning_rate": 7.919595562078324e-06, "loss": 0.8792, "step": 9130 }, { "epoch": 0.3225233896568928, "grad_norm": 1.5611627101898193, "learning_rate": 7.919131178579655e-06, "loss": 0.8251, "step": 9131 }, { "epoch": 0.3225587114606007, "grad_norm": 1.5840340852737427, "learning_rate": 7.918666756875947e-06, "loss": 0.8329, "step": 9132 }, { "epoch": 0.32259403326430863, "grad_norm": 1.5996460914611816, "learning_rate": 7.918202296973277e-06, "loss": 0.7758, "step": 9133 }, { "epoch": 0.32262935506801654, "grad_norm": 1.6467992067337036, "learning_rate": 7.917737798877725e-06, "loss": 0.7886, "step": 9134 }, { "epoch": 0.32266467687172445, "grad_norm": 1.754357933998108, "learning_rate": 7.917273262595368e-06, "loss": 0.818, "step": 9135 }, { "epoch": 0.32269999867543236, "grad_norm": 1.6394262313842773, "learning_rate": 7.916808688132287e-06, "loss": 0.8502, "step": 9136 }, { "epoch": 0.3227353204791403, "grad_norm": 1.7084324359893799, "learning_rate": 7.916344075494562e-06, "loss": 0.7857, "step": 9137 }, { "epoch": 0.3227706422828482, "grad_norm": 1.572545051574707, "learning_rate": 7.915879424688275e-06, "loss": 0.8553, "step": 9138 }, { "epoch": 0.3228059640865561, "grad_norm": 1.7493537664413452, "learning_rate": 7.915414735719506e-06, "loss": 0.8336, "step": 9139 }, { "epoch": 0.322841285890264, "grad_norm": 2.3657963275909424, "learning_rate": 7.914950008594336e-06, "loss": 0.8164, "step": 9140 }, { "epoch": 0.3228766076939719, "grad_norm": 1.5025155544281006, "learning_rate": 7.91448524331885e-06, "loss": 0.7789, "step": 9141 }, { "epoch": 0.3229119294976798, "grad_norm": 1.5395560264587402, "learning_rate": 7.914020439899126e-06, "loss": 0.8285, "step": 9142 }, { "epoch": 0.3229472513013877, "grad_norm": 1.8545366525650024, "learning_rate": 7.913555598341253e-06, "loss": 0.831, "step": 9143 }, { "epoch": 0.3229825731050956, "grad_norm": 1.7149085998535156, "learning_rate": 7.91309071865131e-06, "loss": 0.8188, "step": 9144 }, { "epoch": 0.3230178949088035, "grad_norm": 1.7628347873687744, "learning_rate": 7.912625800835385e-06, "loss": 0.8469, "step": 9145 }, { "epoch": 0.3230532167125114, "grad_norm": 1.9182758331298828, "learning_rate": 7.912160844899562e-06, "loss": 0.8065, "step": 9146 }, { "epoch": 0.3230885385162193, "grad_norm": 1.5991910696029663, "learning_rate": 7.911695850849924e-06, "loss": 0.8501, "step": 9147 }, { "epoch": 0.32312386031992724, "grad_norm": 1.5481284856796265, "learning_rate": 7.911230818692557e-06, "loss": 0.8594, "step": 9148 }, { "epoch": 0.32315918212363515, "grad_norm": 1.9490824937820435, "learning_rate": 7.910765748433548e-06, "loss": 0.848, "step": 9149 }, { "epoch": 0.32319450392734306, "grad_norm": 1.762237548828125, "learning_rate": 7.910300640078984e-06, "loss": 0.8247, "step": 9150 }, { "epoch": 0.32322982573105097, "grad_norm": 1.7025240659713745, "learning_rate": 7.909835493634954e-06, "loss": 0.7934, "step": 9151 }, { "epoch": 0.3232651475347589, "grad_norm": 1.7587486505508423, "learning_rate": 7.909370309107544e-06, "loss": 0.8242, "step": 9152 }, { "epoch": 0.3233004693384668, "grad_norm": 1.6277815103530884, "learning_rate": 7.908905086502842e-06, "loss": 0.8218, "step": 9153 }, { "epoch": 0.3233357911421747, "grad_norm": 1.6077629327774048, "learning_rate": 7.908439825826937e-06, "loss": 0.8052, "step": 9154 }, { "epoch": 0.32337111294588256, "grad_norm": 1.6461516618728638, "learning_rate": 7.907974527085918e-06, "loss": 0.8056, "step": 9155 }, { "epoch": 0.32340643474959047, "grad_norm": 1.644020438194275, "learning_rate": 7.907509190285875e-06, "loss": 0.8209, "step": 9156 }, { "epoch": 0.3234417565532984, "grad_norm": 1.4855120182037354, "learning_rate": 7.907043815432899e-06, "loss": 0.7813, "step": 9157 }, { "epoch": 0.3234770783570063, "grad_norm": 1.5485279560089111, "learning_rate": 7.906578402533079e-06, "loss": 0.8735, "step": 9158 }, { "epoch": 0.3235124001607142, "grad_norm": 1.557725429534912, "learning_rate": 7.906112951592507e-06, "loss": 0.7783, "step": 9159 }, { "epoch": 0.3235477219644221, "grad_norm": 1.8277184963226318, "learning_rate": 7.905647462617276e-06, "loss": 0.8281, "step": 9160 }, { "epoch": 0.32358304376813, "grad_norm": 1.6702800989151, "learning_rate": 7.905181935613478e-06, "loss": 0.8687, "step": 9161 }, { "epoch": 0.32361836557183793, "grad_norm": 1.5992742776870728, "learning_rate": 7.904716370587202e-06, "loss": 0.8155, "step": 9162 }, { "epoch": 0.32365368737554584, "grad_norm": 1.6039187908172607, "learning_rate": 7.904250767544546e-06, "loss": 0.8289, "step": 9163 }, { "epoch": 0.32368900917925375, "grad_norm": 1.7665774822235107, "learning_rate": 7.903785126491602e-06, "loss": 0.8213, "step": 9164 }, { "epoch": 0.32372433098296166, "grad_norm": 1.5898252725601196, "learning_rate": 7.903319447434465e-06, "loss": 0.7822, "step": 9165 }, { "epoch": 0.32375965278666957, "grad_norm": 1.5892138481140137, "learning_rate": 7.902853730379227e-06, "loss": 0.8022, "step": 9166 }, { "epoch": 0.3237949745903775, "grad_norm": 1.5890769958496094, "learning_rate": 7.902387975331986e-06, "loss": 0.7963, "step": 9167 }, { "epoch": 0.32383029639408534, "grad_norm": 1.6827279329299927, "learning_rate": 7.901922182298836e-06, "loss": 0.8261, "step": 9168 }, { "epoch": 0.32386561819779325, "grad_norm": 1.7773200273513794, "learning_rate": 7.901456351285875e-06, "loss": 0.8444, "step": 9169 }, { "epoch": 0.32390094000150116, "grad_norm": 1.6573344469070435, "learning_rate": 7.900990482299199e-06, "loss": 0.8357, "step": 9170 }, { "epoch": 0.32393626180520907, "grad_norm": 1.6044270992279053, "learning_rate": 7.900524575344904e-06, "loss": 0.8358, "step": 9171 }, { "epoch": 0.323971583608917, "grad_norm": 6.900084495544434, "learning_rate": 7.900058630429088e-06, "loss": 0.8106, "step": 9172 }, { "epoch": 0.3240069054126249, "grad_norm": 1.6444402933120728, "learning_rate": 7.89959264755785e-06, "loss": 0.875, "step": 9173 }, { "epoch": 0.3240422272163328, "grad_norm": 1.6239891052246094, "learning_rate": 7.899126626737289e-06, "loss": 0.8334, "step": 9174 }, { "epoch": 0.3240775490200407, "grad_norm": 1.8482578992843628, "learning_rate": 7.898660567973504e-06, "loss": 0.8322, "step": 9175 }, { "epoch": 0.3241128708237486, "grad_norm": 1.6015896797180176, "learning_rate": 7.898194471272593e-06, "loss": 0.7919, "step": 9176 }, { "epoch": 0.32414819262745653, "grad_norm": 1.4585903882980347, "learning_rate": 7.89772833664066e-06, "loss": 0.7924, "step": 9177 }, { "epoch": 0.32418351443116444, "grad_norm": 1.6472660303115845, "learning_rate": 7.897262164083801e-06, "loss": 0.8329, "step": 9178 }, { "epoch": 0.32421883623487235, "grad_norm": 1.760475993156433, "learning_rate": 7.89679595360812e-06, "loss": 0.8311, "step": 9179 }, { "epoch": 0.32425415803858026, "grad_norm": 1.6490967273712158, "learning_rate": 7.89632970521972e-06, "loss": 0.8374, "step": 9180 }, { "epoch": 0.3242894798422881, "grad_norm": 2.0984318256378174, "learning_rate": 7.895863418924698e-06, "loss": 0.8401, "step": 9181 }, { "epoch": 0.32432480164599603, "grad_norm": 2.1625351905822754, "learning_rate": 7.895397094729162e-06, "loss": 0.8382, "step": 9182 }, { "epoch": 0.32436012344970394, "grad_norm": 1.9072566032409668, "learning_rate": 7.894930732639211e-06, "loss": 0.8333, "step": 9183 }, { "epoch": 0.32439544525341185, "grad_norm": 1.6933034658432007, "learning_rate": 7.894464332660955e-06, "loss": 0.8617, "step": 9184 }, { "epoch": 0.32443076705711976, "grad_norm": 1.818608045578003, "learning_rate": 7.893997894800492e-06, "loss": 0.8169, "step": 9185 }, { "epoch": 0.3244660888608277, "grad_norm": 1.6662739515304565, "learning_rate": 7.893531419063926e-06, "loss": 0.8239, "step": 9186 }, { "epoch": 0.3245014106645356, "grad_norm": 1.7673672437667847, "learning_rate": 7.893064905457369e-06, "loss": 0.8263, "step": 9187 }, { "epoch": 0.3245367324682435, "grad_norm": 1.6944259405136108, "learning_rate": 7.89259835398692e-06, "loss": 0.8544, "step": 9188 }, { "epoch": 0.3245720542719514, "grad_norm": 1.8694249391555786, "learning_rate": 7.892131764658687e-06, "loss": 0.8538, "step": 9189 }, { "epoch": 0.3246073760756593, "grad_norm": 1.5960370302200317, "learning_rate": 7.891665137478777e-06, "loss": 0.8254, "step": 9190 }, { "epoch": 0.3246426978793672, "grad_norm": 1.5435012578964233, "learning_rate": 7.891198472453297e-06, "loss": 0.81, "step": 9191 }, { "epoch": 0.32467801968307514, "grad_norm": 1.5862618684768677, "learning_rate": 7.890731769588356e-06, "loss": 0.8099, "step": 9192 }, { "epoch": 0.32471334148678305, "grad_norm": 1.517032504081726, "learning_rate": 7.890265028890062e-06, "loss": 0.7962, "step": 9193 }, { "epoch": 0.3247486632904909, "grad_norm": 1.1451120376586914, "learning_rate": 7.889798250364521e-06, "loss": 0.6221, "step": 9194 }, { "epoch": 0.3247839850941988, "grad_norm": 1.8994357585906982, "learning_rate": 7.889331434017845e-06, "loss": 0.8625, "step": 9195 }, { "epoch": 0.3248193068979067, "grad_norm": 1.9415862560272217, "learning_rate": 7.88886457985614e-06, "loss": 0.8209, "step": 9196 }, { "epoch": 0.32485462870161463, "grad_norm": 0.937971293926239, "learning_rate": 7.88839768788552e-06, "loss": 0.6189, "step": 9197 }, { "epoch": 0.32488995050532254, "grad_norm": 1.7535250186920166, "learning_rate": 7.887930758112094e-06, "loss": 0.8422, "step": 9198 }, { "epoch": 0.32492527230903046, "grad_norm": 1.654344916343689, "learning_rate": 7.887463790541973e-06, "loss": 0.8272, "step": 9199 }, { "epoch": 0.32496059411273837, "grad_norm": 1.7133234739303589, "learning_rate": 7.886996785181268e-06, "loss": 0.8652, "step": 9200 }, { "epoch": 0.3249959159164463, "grad_norm": 1.6371809244155884, "learning_rate": 7.886529742036092e-06, "loss": 0.8496, "step": 9201 }, { "epoch": 0.3250312377201542, "grad_norm": 1.5895860195159912, "learning_rate": 7.88606266111256e-06, "loss": 0.7925, "step": 9202 }, { "epoch": 0.3250665595238621, "grad_norm": 1.7661558389663696, "learning_rate": 7.88559554241678e-06, "loss": 0.8478, "step": 9203 }, { "epoch": 0.32510188132757, "grad_norm": 1.4765346050262451, "learning_rate": 7.885128385954867e-06, "loss": 0.7852, "step": 9204 }, { "epoch": 0.3251372031312779, "grad_norm": 1.6189378499984741, "learning_rate": 7.884661191732937e-06, "loss": 0.8103, "step": 9205 }, { "epoch": 0.32517252493498583, "grad_norm": 1.7600663900375366, "learning_rate": 7.884193959757106e-06, "loss": 0.7976, "step": 9206 }, { "epoch": 0.3252078467386937, "grad_norm": 1.6895185708999634, "learning_rate": 7.883726690033481e-06, "loss": 0.8243, "step": 9207 }, { "epoch": 0.3252431685424016, "grad_norm": 1.587902307510376, "learning_rate": 7.883259382568188e-06, "loss": 0.8442, "step": 9208 }, { "epoch": 0.3252784903461095, "grad_norm": 1.5150161981582642, "learning_rate": 7.882792037367338e-06, "loss": 0.8409, "step": 9209 }, { "epoch": 0.3253138121498174, "grad_norm": 1.4886952638626099, "learning_rate": 7.882324654437045e-06, "loss": 0.7835, "step": 9210 }, { "epoch": 0.3253491339535253, "grad_norm": 1.554153561592102, "learning_rate": 7.88185723378343e-06, "loss": 0.8083, "step": 9211 }, { "epoch": 0.32538445575723324, "grad_norm": 1.6580833196640015, "learning_rate": 7.88138977541261e-06, "loss": 0.8132, "step": 9212 }, { "epoch": 0.32541977756094115, "grad_norm": 1.767856478691101, "learning_rate": 7.880922279330701e-06, "loss": 0.8239, "step": 9213 }, { "epoch": 0.32545509936464906, "grad_norm": 2.2986419200897217, "learning_rate": 7.880454745543824e-06, "loss": 0.7981, "step": 9214 }, { "epoch": 0.32549042116835697, "grad_norm": 1.8930280208587646, "learning_rate": 7.879987174058097e-06, "loss": 0.7958, "step": 9215 }, { "epoch": 0.3255257429720649, "grad_norm": 1.8328126668930054, "learning_rate": 7.879519564879639e-06, "loss": 0.8598, "step": 9216 }, { "epoch": 0.3255610647757728, "grad_norm": 1.6573647260665894, "learning_rate": 7.879051918014569e-06, "loss": 0.7952, "step": 9217 }, { "epoch": 0.3255963865794807, "grad_norm": 1.7320282459259033, "learning_rate": 7.878584233469008e-06, "loss": 0.8652, "step": 9218 }, { "epoch": 0.3256317083831886, "grad_norm": 1.5504118204116821, "learning_rate": 7.87811651124908e-06, "loss": 0.865, "step": 9219 }, { "epoch": 0.32566703018689647, "grad_norm": 1.722579002380371, "learning_rate": 7.877648751360903e-06, "loss": 0.8034, "step": 9220 }, { "epoch": 0.3257023519906044, "grad_norm": 1.6671770811080933, "learning_rate": 7.877180953810602e-06, "loss": 0.8189, "step": 9221 }, { "epoch": 0.3257376737943123, "grad_norm": 2.430872678756714, "learning_rate": 7.876713118604295e-06, "loss": 0.823, "step": 9222 }, { "epoch": 0.3257729955980202, "grad_norm": 1.714864730834961, "learning_rate": 7.87624524574811e-06, "loss": 0.8483, "step": 9223 }, { "epoch": 0.3258083174017281, "grad_norm": 1.7140718698501587, "learning_rate": 7.875777335248165e-06, "loss": 0.7991, "step": 9224 }, { "epoch": 0.325843639205436, "grad_norm": 1.6968032121658325, "learning_rate": 7.875309387110586e-06, "loss": 0.8085, "step": 9225 }, { "epoch": 0.32587896100914393, "grad_norm": 1.6721959114074707, "learning_rate": 7.874841401341502e-06, "loss": 0.8123, "step": 9226 }, { "epoch": 0.32591428281285184, "grad_norm": 1.7769020795822144, "learning_rate": 7.874373377947031e-06, "loss": 0.8245, "step": 9227 }, { "epoch": 0.32594960461655975, "grad_norm": 1.562134027481079, "learning_rate": 7.873905316933303e-06, "loss": 0.8265, "step": 9228 }, { "epoch": 0.32598492642026766, "grad_norm": 1.7692362070083618, "learning_rate": 7.873437218306443e-06, "loss": 0.825, "step": 9229 }, { "epoch": 0.3260202482239756, "grad_norm": 1.5861798524856567, "learning_rate": 7.872969082072577e-06, "loss": 0.801, "step": 9230 }, { "epoch": 0.3260555700276835, "grad_norm": 1.7720788717269897, "learning_rate": 7.87250090823783e-06, "loss": 0.8662, "step": 9231 }, { "epoch": 0.3260908918313914, "grad_norm": 1.655790090560913, "learning_rate": 7.872032696808333e-06, "loss": 0.8305, "step": 9232 }, { "epoch": 0.32612621363509925, "grad_norm": 1.6565333604812622, "learning_rate": 7.87156444779021e-06, "loss": 0.8668, "step": 9233 }, { "epoch": 0.32616153543880716, "grad_norm": 1.6345226764678955, "learning_rate": 7.871096161189593e-06, "loss": 0.799, "step": 9234 }, { "epoch": 0.32619685724251507, "grad_norm": 1.5840870141983032, "learning_rate": 7.870627837012608e-06, "loss": 0.7994, "step": 9235 }, { "epoch": 0.326232179046223, "grad_norm": 1.542534589767456, "learning_rate": 7.870159475265387e-06, "loss": 0.8197, "step": 9236 }, { "epoch": 0.3262675008499309, "grad_norm": 1.654679298400879, "learning_rate": 7.869691075954055e-06, "loss": 0.8434, "step": 9237 }, { "epoch": 0.3263028226536388, "grad_norm": 1.7704060077667236, "learning_rate": 7.869222639084749e-06, "loss": 0.8451, "step": 9238 }, { "epoch": 0.3263381444573467, "grad_norm": 1.6573222875595093, "learning_rate": 7.868754164663595e-06, "loss": 0.8417, "step": 9239 }, { "epoch": 0.3263734662610546, "grad_norm": 1.5873496532440186, "learning_rate": 7.868285652696728e-06, "loss": 0.8057, "step": 9240 }, { "epoch": 0.32640878806476253, "grad_norm": 1.6966638565063477, "learning_rate": 7.867817103190274e-06, "loss": 0.81, "step": 9241 }, { "epoch": 0.32644410986847044, "grad_norm": 1.602830410003662, "learning_rate": 7.86734851615037e-06, "loss": 0.8569, "step": 9242 }, { "epoch": 0.32647943167217836, "grad_norm": 1.5767172574996948, "learning_rate": 7.866879891583148e-06, "loss": 0.8604, "step": 9243 }, { "epoch": 0.32651475347588627, "grad_norm": 1.6625984907150269, "learning_rate": 7.86641122949474e-06, "loss": 0.8318, "step": 9244 }, { "epoch": 0.3265500752795942, "grad_norm": 1.7500603199005127, "learning_rate": 7.865942529891284e-06, "loss": 0.8284, "step": 9245 }, { "epoch": 0.32658539708330203, "grad_norm": 1.7607859373092651, "learning_rate": 7.865473792778909e-06, "loss": 0.8421, "step": 9246 }, { "epoch": 0.32662071888700994, "grad_norm": 1.5582380294799805, "learning_rate": 7.865005018163751e-06, "loss": 0.8264, "step": 9247 }, { "epoch": 0.32665604069071785, "grad_norm": 1.5833114385604858, "learning_rate": 7.864536206051944e-06, "loss": 0.8433, "step": 9248 }, { "epoch": 0.32669136249442576, "grad_norm": 1.7921240329742432, "learning_rate": 7.86406735644963e-06, "loss": 0.8435, "step": 9249 }, { "epoch": 0.3267266842981337, "grad_norm": 1.6432913541793823, "learning_rate": 7.863598469362938e-06, "loss": 0.8406, "step": 9250 }, { "epoch": 0.3267620061018416, "grad_norm": 1.7737503051757812, "learning_rate": 7.863129544798007e-06, "loss": 0.8274, "step": 9251 }, { "epoch": 0.3267973279055495, "grad_norm": 1.946282148361206, "learning_rate": 7.862660582760977e-06, "loss": 0.8161, "step": 9252 }, { "epoch": 0.3268326497092574, "grad_norm": 1.6072168350219727, "learning_rate": 7.862191583257983e-06, "loss": 0.7983, "step": 9253 }, { "epoch": 0.3268679715129653, "grad_norm": 1.8011668920516968, "learning_rate": 7.861722546295161e-06, "loss": 0.8382, "step": 9254 }, { "epoch": 0.3269032933166732, "grad_norm": 1.6918730735778809, "learning_rate": 7.861253471878655e-06, "loss": 0.821, "step": 9255 }, { "epoch": 0.32693861512038114, "grad_norm": 1.7929574251174927, "learning_rate": 7.860784360014599e-06, "loss": 0.8222, "step": 9256 }, { "epoch": 0.32697393692408905, "grad_norm": 1.8617513179779053, "learning_rate": 7.860315210709137e-06, "loss": 0.8471, "step": 9257 }, { "epoch": 0.32700925872779696, "grad_norm": 1.7040371894836426, "learning_rate": 7.859846023968406e-06, "loss": 0.8699, "step": 9258 }, { "epoch": 0.3270445805315048, "grad_norm": 1.907950758934021, "learning_rate": 7.85937679979855e-06, "loss": 0.7981, "step": 9259 }, { "epoch": 0.3270799023352127, "grad_norm": 1.667755365371704, "learning_rate": 7.858907538205706e-06, "loss": 0.8307, "step": 9260 }, { "epoch": 0.32711522413892063, "grad_norm": 1.6956766843795776, "learning_rate": 7.858438239196019e-06, "loss": 0.8104, "step": 9261 }, { "epoch": 0.32715054594262855, "grad_norm": 1.7309221029281616, "learning_rate": 7.857968902775627e-06, "loss": 0.8173, "step": 9262 }, { "epoch": 0.32718586774633646, "grad_norm": 2.049401044845581, "learning_rate": 7.857499528950679e-06, "loss": 0.8683, "step": 9263 }, { "epoch": 0.32722118955004437, "grad_norm": 1.7700715065002441, "learning_rate": 7.857030117727312e-06, "loss": 0.8343, "step": 9264 }, { "epoch": 0.3272565113537523, "grad_norm": 1.6747087240219116, "learning_rate": 7.856560669111672e-06, "loss": 0.8385, "step": 9265 }, { "epoch": 0.3272918331574602, "grad_norm": 1.644477128982544, "learning_rate": 7.856091183109902e-06, "loss": 0.8218, "step": 9266 }, { "epoch": 0.3273271549611681, "grad_norm": 1.6462385654449463, "learning_rate": 7.855621659728149e-06, "loss": 0.8169, "step": 9267 }, { "epoch": 0.327362476764876, "grad_norm": 1.7058478593826294, "learning_rate": 7.855152098972557e-06, "loss": 0.8321, "step": 9268 }, { "epoch": 0.3273977985685839, "grad_norm": 1.8850266933441162, "learning_rate": 7.85468250084927e-06, "loss": 0.8273, "step": 9269 }, { "epoch": 0.32743312037229183, "grad_norm": 1.8289836645126343, "learning_rate": 7.854212865364434e-06, "loss": 0.871, "step": 9270 }, { "epoch": 0.32746844217599974, "grad_norm": 1.903954267501831, "learning_rate": 7.853743192524199e-06, "loss": 0.7981, "step": 9271 }, { "epoch": 0.3275037639797076, "grad_norm": 1.7127740383148193, "learning_rate": 7.853273482334708e-06, "loss": 0.8561, "step": 9272 }, { "epoch": 0.3275390857834155, "grad_norm": 1.6069220304489136, "learning_rate": 7.85280373480211e-06, "loss": 0.7846, "step": 9273 }, { "epoch": 0.3275744075871234, "grad_norm": 1.6695101261138916, "learning_rate": 7.852333949932554e-06, "loss": 0.8066, "step": 9274 }, { "epoch": 0.32760972939083133, "grad_norm": 1.6546709537506104, "learning_rate": 7.851864127732186e-06, "loss": 0.8234, "step": 9275 }, { "epoch": 0.32764505119453924, "grad_norm": 1.54889976978302, "learning_rate": 7.851394268207158e-06, "loss": 0.8026, "step": 9276 }, { "epoch": 0.32768037299824715, "grad_norm": 1.6111228466033936, "learning_rate": 7.850924371363617e-06, "loss": 0.8256, "step": 9277 }, { "epoch": 0.32771569480195506, "grad_norm": 2.1245269775390625, "learning_rate": 7.850454437207712e-06, "loss": 0.8154, "step": 9278 }, { "epoch": 0.32775101660566297, "grad_norm": 1.778504490852356, "learning_rate": 7.849984465745598e-06, "loss": 0.8184, "step": 9279 }, { "epoch": 0.3277863384093709, "grad_norm": 1.6862932443618774, "learning_rate": 7.849514456983423e-06, "loss": 0.8176, "step": 9280 }, { "epoch": 0.3278216602130788, "grad_norm": 1.6487606763839722, "learning_rate": 7.849044410927337e-06, "loss": 0.8392, "step": 9281 }, { "epoch": 0.3278569820167867, "grad_norm": 2.039669990539551, "learning_rate": 7.848574327583494e-06, "loss": 0.8332, "step": 9282 }, { "epoch": 0.3278923038204946, "grad_norm": 1.5682384967803955, "learning_rate": 7.848104206958045e-06, "loss": 0.8611, "step": 9283 }, { "epoch": 0.3279276256242025, "grad_norm": 1.7466880083084106, "learning_rate": 7.847634049057142e-06, "loss": 0.8215, "step": 9284 }, { "epoch": 0.32796294742791043, "grad_norm": 1.5098252296447754, "learning_rate": 7.847163853886944e-06, "loss": 0.7816, "step": 9285 }, { "epoch": 0.3279982692316183, "grad_norm": 2.047985792160034, "learning_rate": 7.846693621453599e-06, "loss": 0.8354, "step": 9286 }, { "epoch": 0.3280335910353262, "grad_norm": 1.5444153547286987, "learning_rate": 7.84622335176326e-06, "loss": 0.8201, "step": 9287 }, { "epoch": 0.3280689128390341, "grad_norm": 1.6536341905593872, "learning_rate": 7.845753044822089e-06, "loss": 0.8087, "step": 9288 }, { "epoch": 0.328104234642742, "grad_norm": 1.5786242485046387, "learning_rate": 7.845282700636235e-06, "loss": 0.8138, "step": 9289 }, { "epoch": 0.32813955644644993, "grad_norm": 1.527495265007019, "learning_rate": 7.844812319211855e-06, "loss": 0.8136, "step": 9290 }, { "epoch": 0.32817487825015784, "grad_norm": 1.6739051342010498, "learning_rate": 7.844341900555109e-06, "loss": 0.8071, "step": 9291 }, { "epoch": 0.32821020005386575, "grad_norm": 1.6106557846069336, "learning_rate": 7.843871444672148e-06, "loss": 0.8347, "step": 9292 }, { "epoch": 0.32824552185757366, "grad_norm": 1.590615153312683, "learning_rate": 7.843400951569134e-06, "loss": 0.8253, "step": 9293 }, { "epoch": 0.3282808436612816, "grad_norm": 1.8063006401062012, "learning_rate": 7.842930421252222e-06, "loss": 0.8188, "step": 9294 }, { "epoch": 0.3283161654649895, "grad_norm": 1.734635353088379, "learning_rate": 7.842459853727571e-06, "loss": 0.8339, "step": 9295 }, { "epoch": 0.3283514872686974, "grad_norm": 1.8926125764846802, "learning_rate": 7.841989249001339e-06, "loss": 0.8343, "step": 9296 }, { "epoch": 0.3283868090724053, "grad_norm": 1.6121852397918701, "learning_rate": 7.841518607079688e-06, "loss": 0.8255, "step": 9297 }, { "epoch": 0.3284221308761132, "grad_norm": 4.964745044708252, "learning_rate": 7.841047927968775e-06, "loss": 0.8844, "step": 9298 }, { "epoch": 0.32845745267982107, "grad_norm": 1.766539216041565, "learning_rate": 7.84057721167476e-06, "loss": 0.8714, "step": 9299 }, { "epoch": 0.328492774483529, "grad_norm": 1.6297881603240967, "learning_rate": 7.840106458203804e-06, "loss": 0.814, "step": 9300 }, { "epoch": 0.3285280962872369, "grad_norm": 1.5585616827011108, "learning_rate": 7.83963566756207e-06, "loss": 0.7938, "step": 9301 }, { "epoch": 0.3285634180909448, "grad_norm": 1.7551196813583374, "learning_rate": 7.839164839755717e-06, "loss": 0.7976, "step": 9302 }, { "epoch": 0.3285987398946527, "grad_norm": 1.7274397611618042, "learning_rate": 7.838693974790907e-06, "loss": 0.8173, "step": 9303 }, { "epoch": 0.3286340616983606, "grad_norm": 1.6165646314620972, "learning_rate": 7.838223072673808e-06, "loss": 0.8385, "step": 9304 }, { "epoch": 0.32866938350206854, "grad_norm": 1.6860276460647583, "learning_rate": 7.837752133410576e-06, "loss": 0.8498, "step": 9305 }, { "epoch": 0.32870470530577645, "grad_norm": 1.8445477485656738, "learning_rate": 7.837281157007377e-06, "loss": 0.8632, "step": 9306 }, { "epoch": 0.32874002710948436, "grad_norm": 1.637772560119629, "learning_rate": 7.836810143470376e-06, "loss": 0.8978, "step": 9307 }, { "epoch": 0.32877534891319227, "grad_norm": 1.7372314929962158, "learning_rate": 7.836339092805739e-06, "loss": 0.8059, "step": 9308 }, { "epoch": 0.3288106707169002, "grad_norm": 1.5016831159591675, "learning_rate": 7.835868005019627e-06, "loss": 0.8039, "step": 9309 }, { "epoch": 0.3288459925206081, "grad_norm": 1.8688629865646362, "learning_rate": 7.835396880118209e-06, "loss": 0.8483, "step": 9310 }, { "epoch": 0.328881314324316, "grad_norm": 2.003051519393921, "learning_rate": 7.834925718107648e-06, "loss": 0.848, "step": 9311 }, { "epoch": 0.32891663612802385, "grad_norm": 2.2575085163116455, "learning_rate": 7.834454518994114e-06, "loss": 0.8259, "step": 9312 }, { "epoch": 0.32895195793173176, "grad_norm": 2.013338565826416, "learning_rate": 7.833983282783772e-06, "loss": 0.8589, "step": 9313 }, { "epoch": 0.3289872797354397, "grad_norm": 1.6510058641433716, "learning_rate": 7.83351200948279e-06, "loss": 0.8162, "step": 9314 }, { "epoch": 0.3290226015391476, "grad_norm": 1.6451058387756348, "learning_rate": 7.833040699097334e-06, "loss": 0.8195, "step": 9315 }, { "epoch": 0.3290579233428555, "grad_norm": 1.8855077028274536, "learning_rate": 7.832569351633574e-06, "loss": 0.7734, "step": 9316 }, { "epoch": 0.3290932451465634, "grad_norm": 1.9229120016098022, "learning_rate": 7.832097967097678e-06, "loss": 0.8042, "step": 9317 }, { "epoch": 0.3291285669502713, "grad_norm": 1.6651417016983032, "learning_rate": 7.831626545495819e-06, "loss": 0.8212, "step": 9318 }, { "epoch": 0.32916388875397923, "grad_norm": 1.6206027269363403, "learning_rate": 7.831155086834161e-06, "loss": 0.7854, "step": 9319 }, { "epoch": 0.32919921055768714, "grad_norm": 1.1380162239074707, "learning_rate": 7.830683591118879e-06, "loss": 0.6061, "step": 9320 }, { "epoch": 0.32923453236139505, "grad_norm": 1.7167402505874634, "learning_rate": 7.830212058356141e-06, "loss": 0.8427, "step": 9321 }, { "epoch": 0.32926985416510296, "grad_norm": 1.9600030183792114, "learning_rate": 7.829740488552122e-06, "loss": 0.8566, "step": 9322 }, { "epoch": 0.32930517596881087, "grad_norm": 1.5232765674591064, "learning_rate": 7.82926888171299e-06, "loss": 0.8253, "step": 9323 }, { "epoch": 0.3293404977725188, "grad_norm": 1.845521330833435, "learning_rate": 7.828797237844919e-06, "loss": 0.8778, "step": 9324 }, { "epoch": 0.32937581957622664, "grad_norm": 1.600428581237793, "learning_rate": 7.82832555695408e-06, "loss": 0.82, "step": 9325 }, { "epoch": 0.32941114137993455, "grad_norm": 1.7467551231384277, "learning_rate": 7.827853839046648e-06, "loss": 0.8227, "step": 9326 }, { "epoch": 0.32944646318364246, "grad_norm": 2.0077733993530273, "learning_rate": 7.827382084128798e-06, "loss": 0.8498, "step": 9327 }, { "epoch": 0.32948178498735037, "grad_norm": 0.9804145097732544, "learning_rate": 7.826910292206702e-06, "loss": 0.5877, "step": 9328 }, { "epoch": 0.3295171067910583, "grad_norm": 2.036123037338257, "learning_rate": 7.826438463286536e-06, "loss": 0.8064, "step": 9329 }, { "epoch": 0.3295524285947662, "grad_norm": 1.6952766180038452, "learning_rate": 7.825966597374475e-06, "loss": 0.8235, "step": 9330 }, { "epoch": 0.3295877503984741, "grad_norm": 1.726871132850647, "learning_rate": 7.825494694476694e-06, "loss": 0.8523, "step": 9331 }, { "epoch": 0.329623072202182, "grad_norm": 1.9726725816726685, "learning_rate": 7.82502275459937e-06, "loss": 0.8499, "step": 9332 }, { "epoch": 0.3296583940058899, "grad_norm": 1.5498915910720825, "learning_rate": 7.824550777748679e-06, "loss": 0.8211, "step": 9333 }, { "epoch": 0.32969371580959783, "grad_norm": 1.7599645853042603, "learning_rate": 7.824078763930797e-06, "loss": 0.8287, "step": 9334 }, { "epoch": 0.32972903761330574, "grad_norm": 1.8432139158248901, "learning_rate": 7.823606713151904e-06, "loss": 0.8649, "step": 9335 }, { "epoch": 0.32976435941701365, "grad_norm": 1.9618785381317139, "learning_rate": 7.823134625418177e-06, "loss": 0.8116, "step": 9336 }, { "epoch": 0.32979968122072156, "grad_norm": 2.0124502182006836, "learning_rate": 7.822662500735795e-06, "loss": 0.8232, "step": 9337 }, { "epoch": 0.3298350030244294, "grad_norm": 1.6300820112228394, "learning_rate": 7.822190339110937e-06, "loss": 0.8145, "step": 9338 }, { "epoch": 0.32987032482813733, "grad_norm": 1.5334327220916748, "learning_rate": 7.82171814054978e-06, "loss": 0.8484, "step": 9339 }, { "epoch": 0.32990564663184524, "grad_norm": 1.5880773067474365, "learning_rate": 7.82124590505851e-06, "loss": 0.8147, "step": 9340 }, { "epoch": 0.32994096843555315, "grad_norm": 1.619101643562317, "learning_rate": 7.820773632643304e-06, "loss": 0.7802, "step": 9341 }, { "epoch": 0.32997629023926106, "grad_norm": 1.5536149740219116, "learning_rate": 7.82030132331034e-06, "loss": 0.8296, "step": 9342 }, { "epoch": 0.33001161204296897, "grad_norm": 1.065030813217163, "learning_rate": 7.819828977065804e-06, "loss": 0.6245, "step": 9343 }, { "epoch": 0.3300469338466769, "grad_norm": 1.5456688404083252, "learning_rate": 7.819356593915877e-06, "loss": 0.8285, "step": 9344 }, { "epoch": 0.3300822556503848, "grad_norm": 1.5637869834899902, "learning_rate": 7.818884173866738e-06, "loss": 0.7846, "step": 9345 }, { "epoch": 0.3301175774540927, "grad_norm": 1.660169005393982, "learning_rate": 7.818411716924575e-06, "loss": 0.8227, "step": 9346 }, { "epoch": 0.3301528992578006, "grad_norm": 1.5587364435195923, "learning_rate": 7.81793922309557e-06, "loss": 0.8024, "step": 9347 }, { "epoch": 0.3301882210615085, "grad_norm": 1.716485857963562, "learning_rate": 7.817466692385905e-06, "loss": 0.8172, "step": 9348 }, { "epoch": 0.33022354286521644, "grad_norm": 1.6417702436447144, "learning_rate": 7.816994124801765e-06, "loss": 0.8323, "step": 9349 }, { "epoch": 0.33025886466892435, "grad_norm": 1.5997289419174194, "learning_rate": 7.816521520349339e-06, "loss": 0.8488, "step": 9350 }, { "epoch": 0.3302941864726322, "grad_norm": 1.7149133682250977, "learning_rate": 7.816048879034805e-06, "loss": 0.8112, "step": 9351 }, { "epoch": 0.3303295082763401, "grad_norm": 1.6329777240753174, "learning_rate": 7.815576200864354e-06, "loss": 0.8439, "step": 9352 }, { "epoch": 0.330364830080048, "grad_norm": 1.78536856174469, "learning_rate": 7.81510348584417e-06, "loss": 0.8179, "step": 9353 }, { "epoch": 0.33040015188375593, "grad_norm": 1.6250050067901611, "learning_rate": 7.814630733980442e-06, "loss": 0.8662, "step": 9354 }, { "epoch": 0.33043547368746384, "grad_norm": 1.5689650774002075, "learning_rate": 7.814157945279355e-06, "loss": 0.7916, "step": 9355 }, { "epoch": 0.33047079549117175, "grad_norm": 1.6629060506820679, "learning_rate": 7.8136851197471e-06, "loss": 0.8468, "step": 9356 }, { "epoch": 0.33050611729487966, "grad_norm": 1.6513444185256958, "learning_rate": 7.81321225738986e-06, "loss": 0.8424, "step": 9357 }, { "epoch": 0.3305414390985876, "grad_norm": 1.5737552642822266, "learning_rate": 7.812739358213827e-06, "loss": 0.8412, "step": 9358 }, { "epoch": 0.3305767609022955, "grad_norm": 2.0353193283081055, "learning_rate": 7.812266422225192e-06, "loss": 0.8115, "step": 9359 }, { "epoch": 0.3306120827060034, "grad_norm": 1.5762137174606323, "learning_rate": 7.811793449430142e-06, "loss": 0.8373, "step": 9360 }, { "epoch": 0.3306474045097113, "grad_norm": 1.7028316259384155, "learning_rate": 7.811320439834867e-06, "loss": 0.8369, "step": 9361 }, { "epoch": 0.3306827263134192, "grad_norm": 1.743740439414978, "learning_rate": 7.810847393445559e-06, "loss": 0.9017, "step": 9362 }, { "epoch": 0.33071804811712713, "grad_norm": 1.6665678024291992, "learning_rate": 7.810374310268409e-06, "loss": 0.8287, "step": 9363 }, { "epoch": 0.330753369920835, "grad_norm": 1.6887987852096558, "learning_rate": 7.809901190309608e-06, "loss": 0.8445, "step": 9364 }, { "epoch": 0.3307886917245429, "grad_norm": 1.6999658346176147, "learning_rate": 7.809428033575351e-06, "loss": 0.833, "step": 9365 }, { "epoch": 0.3308240135282508, "grad_norm": 1.8185757398605347, "learning_rate": 7.808954840071824e-06, "loss": 0.8305, "step": 9366 }, { "epoch": 0.3308593353319587, "grad_norm": 1.8087108135223389, "learning_rate": 7.808481609805225e-06, "loss": 0.8171, "step": 9367 }, { "epoch": 0.3308946571356666, "grad_norm": 1.6596214771270752, "learning_rate": 7.80800834278175e-06, "loss": 0.8035, "step": 9368 }, { "epoch": 0.33092997893937454, "grad_norm": 1.643808126449585, "learning_rate": 7.807535039007587e-06, "loss": 0.8275, "step": 9369 }, { "epoch": 0.33096530074308245, "grad_norm": 1.592720627784729, "learning_rate": 7.807061698488932e-06, "loss": 0.807, "step": 9370 }, { "epoch": 0.33100062254679036, "grad_norm": 1.6668528318405151, "learning_rate": 7.806588321231983e-06, "loss": 0.8391, "step": 9371 }, { "epoch": 0.33103594435049827, "grad_norm": 1.8255048990249634, "learning_rate": 7.806114907242932e-06, "loss": 0.8626, "step": 9372 }, { "epoch": 0.3310712661542062, "grad_norm": 1.6569873094558716, "learning_rate": 7.80564145652798e-06, "loss": 0.7966, "step": 9373 }, { "epoch": 0.3311065879579141, "grad_norm": 1.7748112678527832, "learning_rate": 7.805167969093316e-06, "loss": 0.8314, "step": 9374 }, { "epoch": 0.331141909761622, "grad_norm": 1.7912743091583252, "learning_rate": 7.804694444945142e-06, "loss": 0.8616, "step": 9375 }, { "epoch": 0.3311772315653299, "grad_norm": 1.6443454027175903, "learning_rate": 7.804220884089655e-06, "loss": 0.8298, "step": 9376 }, { "epoch": 0.33121255336903777, "grad_norm": 1.9980151653289795, "learning_rate": 7.803747286533052e-06, "loss": 0.8021, "step": 9377 }, { "epoch": 0.3312478751727457, "grad_norm": 1.6866281032562256, "learning_rate": 7.803273652281533e-06, "loss": 0.8207, "step": 9378 }, { "epoch": 0.3312831969764536, "grad_norm": 1.6195902824401855, "learning_rate": 7.802799981341293e-06, "loss": 0.8103, "step": 9379 }, { "epoch": 0.3313185187801615, "grad_norm": 1.590367317199707, "learning_rate": 7.802326273718535e-06, "loss": 0.8275, "step": 9380 }, { "epoch": 0.3313538405838694, "grad_norm": 1.704537272453308, "learning_rate": 7.801852529419457e-06, "loss": 0.8411, "step": 9381 }, { "epoch": 0.3313891623875773, "grad_norm": 1.6432677507400513, "learning_rate": 7.801378748450261e-06, "loss": 0.8035, "step": 9382 }, { "epoch": 0.33142448419128523, "grad_norm": 1.6612013578414917, "learning_rate": 7.800904930817146e-06, "loss": 0.8172, "step": 9383 }, { "epoch": 0.33145980599499314, "grad_norm": 2.2100882530212402, "learning_rate": 7.800431076526314e-06, "loss": 0.8218, "step": 9384 }, { "epoch": 0.33149512779870105, "grad_norm": 1.5513317584991455, "learning_rate": 7.799957185583966e-06, "loss": 0.8474, "step": 9385 }, { "epoch": 0.33153044960240896, "grad_norm": 1.6827830076217651, "learning_rate": 7.799483257996302e-06, "loss": 0.8469, "step": 9386 }, { "epoch": 0.33156577140611687, "grad_norm": 1.6621501445770264, "learning_rate": 7.799009293769533e-06, "loss": 0.8117, "step": 9387 }, { "epoch": 0.3316010932098248, "grad_norm": 1.7065942287445068, "learning_rate": 7.798535292909852e-06, "loss": 0.8268, "step": 9388 }, { "epoch": 0.3316364150135327, "grad_norm": 1.583726167678833, "learning_rate": 7.798061255423469e-06, "loss": 0.847, "step": 9389 }, { "epoch": 0.33167173681724055, "grad_norm": 1.6647199392318726, "learning_rate": 7.797587181316587e-06, "loss": 0.8232, "step": 9390 }, { "epoch": 0.33170705862094846, "grad_norm": 1.7513614892959595, "learning_rate": 7.797113070595408e-06, "loss": 0.8661, "step": 9391 }, { "epoch": 0.33174238042465637, "grad_norm": 1.6094592809677124, "learning_rate": 7.79663892326614e-06, "loss": 0.8453, "step": 9392 }, { "epoch": 0.3317777022283643, "grad_norm": 1.1903213262557983, "learning_rate": 7.796164739334986e-06, "loss": 0.6175, "step": 9393 }, { "epoch": 0.3318130240320722, "grad_norm": 1.6916897296905518, "learning_rate": 7.795690518808156e-06, "loss": 0.794, "step": 9394 }, { "epoch": 0.3318483458357801, "grad_norm": 1.6334800720214844, "learning_rate": 7.795216261691852e-06, "loss": 0.8111, "step": 9395 }, { "epoch": 0.331883667639488, "grad_norm": 1.8782471418380737, "learning_rate": 7.794741967992283e-06, "loss": 0.8173, "step": 9396 }, { "epoch": 0.3319189894431959, "grad_norm": 1.6310421228408813, "learning_rate": 7.794267637715658e-06, "loss": 0.8136, "step": 9397 }, { "epoch": 0.33195431124690383, "grad_norm": 1.9209810495376587, "learning_rate": 7.793793270868184e-06, "loss": 0.8073, "step": 9398 }, { "epoch": 0.33198963305061174, "grad_norm": 1.6591249704360962, "learning_rate": 7.793318867456067e-06, "loss": 0.7797, "step": 9399 }, { "epoch": 0.33202495485431965, "grad_norm": 1.793268084526062, "learning_rate": 7.792844427485517e-06, "loss": 0.8528, "step": 9400 }, { "epoch": 0.33206027665802756, "grad_norm": 1.4717662334442139, "learning_rate": 7.792369950962747e-06, "loss": 0.8152, "step": 9401 }, { "epoch": 0.3320955984617355, "grad_norm": 1.702790379524231, "learning_rate": 7.791895437893962e-06, "loss": 0.8366, "step": 9402 }, { "epoch": 0.33213092026544333, "grad_norm": 1.556546688079834, "learning_rate": 7.791420888285374e-06, "loss": 0.8072, "step": 9403 }, { "epoch": 0.33216624206915124, "grad_norm": 1.9890873432159424, "learning_rate": 7.790946302143194e-06, "loss": 0.8227, "step": 9404 }, { "epoch": 0.33220156387285915, "grad_norm": 1.7105944156646729, "learning_rate": 7.790471679473635e-06, "loss": 0.8274, "step": 9405 }, { "epoch": 0.33223688567656706, "grad_norm": 1.6322522163391113, "learning_rate": 7.789997020282907e-06, "loss": 0.823, "step": 9406 }, { "epoch": 0.332272207480275, "grad_norm": 1.498918890953064, "learning_rate": 7.789522324577222e-06, "loss": 0.8127, "step": 9407 }, { "epoch": 0.3323075292839829, "grad_norm": 1.8863890171051025, "learning_rate": 7.789047592362793e-06, "loss": 0.788, "step": 9408 }, { "epoch": 0.3323428510876908, "grad_norm": 1.7625106573104858, "learning_rate": 7.788572823645834e-06, "loss": 0.8371, "step": 9409 }, { "epoch": 0.3323781728913987, "grad_norm": 1.667339563369751, "learning_rate": 7.788098018432557e-06, "loss": 0.8129, "step": 9410 }, { "epoch": 0.3324134946951066, "grad_norm": 2.02193284034729, "learning_rate": 7.787623176729178e-06, "loss": 0.7913, "step": 9411 }, { "epoch": 0.3324488164988145, "grad_norm": 1.637965202331543, "learning_rate": 7.78714829854191e-06, "loss": 0.8248, "step": 9412 }, { "epoch": 0.33248413830252244, "grad_norm": 1.7535914182662964, "learning_rate": 7.78667338387697e-06, "loss": 0.7969, "step": 9413 }, { "epoch": 0.33251946010623035, "grad_norm": 1.7165026664733887, "learning_rate": 7.786198432740572e-06, "loss": 0.8406, "step": 9414 }, { "epoch": 0.33255478190993826, "grad_norm": 1.7603445053100586, "learning_rate": 7.785723445138935e-06, "loss": 0.8365, "step": 9415 }, { "epoch": 0.3325901037136461, "grad_norm": 1.6919969320297241, "learning_rate": 7.785248421078272e-06, "loss": 0.8149, "step": 9416 }, { "epoch": 0.332625425517354, "grad_norm": 1.6458775997161865, "learning_rate": 7.784773360564799e-06, "loss": 0.8119, "step": 9417 }, { "epoch": 0.33266074732106193, "grad_norm": 1.5457231998443604, "learning_rate": 7.784298263604738e-06, "loss": 0.7813, "step": 9418 }, { "epoch": 0.33269606912476984, "grad_norm": 1.5482784509658813, "learning_rate": 7.783823130204302e-06, "loss": 0.8278, "step": 9419 }, { "epoch": 0.33273139092847775, "grad_norm": 1.7553032636642456, "learning_rate": 7.783347960369714e-06, "loss": 0.825, "step": 9420 }, { "epoch": 0.33276671273218567, "grad_norm": 1.6270555257797241, "learning_rate": 7.782872754107192e-06, "loss": 0.8455, "step": 9421 }, { "epoch": 0.3328020345358936, "grad_norm": 0.9323490858078003, "learning_rate": 7.782397511422953e-06, "loss": 0.5983, "step": 9422 }, { "epoch": 0.3328373563396015, "grad_norm": 1.7950737476348877, "learning_rate": 7.781922232323219e-06, "loss": 0.8434, "step": 9423 }, { "epoch": 0.3328726781433094, "grad_norm": 1.667238473892212, "learning_rate": 7.78144691681421e-06, "loss": 0.8267, "step": 9424 }, { "epoch": 0.3329079999470173, "grad_norm": 1.7919034957885742, "learning_rate": 7.780971564902147e-06, "loss": 0.867, "step": 9425 }, { "epoch": 0.3329433217507252, "grad_norm": 1.5750041007995605, "learning_rate": 7.78049617659325e-06, "loss": 0.8208, "step": 9426 }, { "epoch": 0.33297864355443313, "grad_norm": 1.637703776359558, "learning_rate": 7.780020751893742e-06, "loss": 0.8056, "step": 9427 }, { "epoch": 0.33301396535814104, "grad_norm": 1.5564252138137817, "learning_rate": 7.779545290809845e-06, "loss": 0.8164, "step": 9428 }, { "epoch": 0.3330492871618489, "grad_norm": 1.7141226530075073, "learning_rate": 7.779069793347782e-06, "loss": 0.8346, "step": 9429 }, { "epoch": 0.3330846089655568, "grad_norm": 1.6056156158447266, "learning_rate": 7.778594259513775e-06, "loss": 0.8033, "step": 9430 }, { "epoch": 0.3331199307692647, "grad_norm": 1.7609617710113525, "learning_rate": 7.77811868931405e-06, "loss": 0.8289, "step": 9431 }, { "epoch": 0.3331552525729726, "grad_norm": 1.5569247007369995, "learning_rate": 7.777643082754828e-06, "loss": 0.8333, "step": 9432 }, { "epoch": 0.33319057437668054, "grad_norm": 1.6188462972640991, "learning_rate": 7.77716743984234e-06, "loss": 0.828, "step": 9433 }, { "epoch": 0.33322589618038845, "grad_norm": 1.6283916234970093, "learning_rate": 7.776691760582801e-06, "loss": 0.7891, "step": 9434 }, { "epoch": 0.33326121798409636, "grad_norm": 1.5483418703079224, "learning_rate": 7.776216044982446e-06, "loss": 0.8022, "step": 9435 }, { "epoch": 0.33329653978780427, "grad_norm": 1.4950580596923828, "learning_rate": 7.775740293047496e-06, "loss": 0.8072, "step": 9436 }, { "epoch": 0.3333318615915122, "grad_norm": 1.587689995765686, "learning_rate": 7.775264504784177e-06, "loss": 0.8405, "step": 9437 }, { "epoch": 0.3333671833952201, "grad_norm": 1.6203277111053467, "learning_rate": 7.77478868019872e-06, "loss": 0.8314, "step": 9438 }, { "epoch": 0.333402505198928, "grad_norm": 1.8211638927459717, "learning_rate": 7.774312819297349e-06, "loss": 0.8246, "step": 9439 }, { "epoch": 0.3334378270026359, "grad_norm": 2.068699836730957, "learning_rate": 7.773836922086296e-06, "loss": 0.8133, "step": 9440 }, { "epoch": 0.3334731488063438, "grad_norm": 1.821012258529663, "learning_rate": 7.773360988571783e-06, "loss": 0.8266, "step": 9441 }, { "epoch": 0.3335084706100517, "grad_norm": 1.750658392906189, "learning_rate": 7.772885018760045e-06, "loss": 0.8191, "step": 9442 }, { "epoch": 0.3335437924137596, "grad_norm": 1.6808066368103027, "learning_rate": 7.77240901265731e-06, "loss": 0.8304, "step": 9443 }, { "epoch": 0.3335791142174675, "grad_norm": 1.6203099489212036, "learning_rate": 7.771932970269805e-06, "loss": 0.8127, "step": 9444 }, { "epoch": 0.3336144360211754, "grad_norm": 1.6563507318496704, "learning_rate": 7.771456891603763e-06, "loss": 0.8504, "step": 9445 }, { "epoch": 0.3336497578248833, "grad_norm": 1.9328573942184448, "learning_rate": 7.770980776665415e-06, "loss": 0.853, "step": 9446 }, { "epoch": 0.33368507962859123, "grad_norm": 1.709121584892273, "learning_rate": 7.77050462546099e-06, "loss": 0.812, "step": 9447 }, { "epoch": 0.33372040143229914, "grad_norm": 1.5556772947311401, "learning_rate": 7.770028437996722e-06, "loss": 0.8135, "step": 9448 }, { "epoch": 0.33375572323600705, "grad_norm": 1.4915931224822998, "learning_rate": 7.769552214278842e-06, "loss": 0.8436, "step": 9449 }, { "epoch": 0.33379104503971496, "grad_norm": 1.6271476745605469, "learning_rate": 7.769075954313585e-06, "loss": 0.7987, "step": 9450 }, { "epoch": 0.3338263668434229, "grad_norm": 1.639559268951416, "learning_rate": 7.768599658107181e-06, "loss": 0.8383, "step": 9451 }, { "epoch": 0.3338616886471308, "grad_norm": 1.6271109580993652, "learning_rate": 7.768123325665866e-06, "loss": 0.799, "step": 9452 }, { "epoch": 0.3338970104508387, "grad_norm": 2.0014147758483887, "learning_rate": 7.767646956995872e-06, "loss": 0.8247, "step": 9453 }, { "epoch": 0.3339323322545466, "grad_norm": 1.6091508865356445, "learning_rate": 7.767170552103436e-06, "loss": 0.8418, "step": 9454 }, { "epoch": 0.33396765405825446, "grad_norm": 1.8971534967422485, "learning_rate": 7.76669411099479e-06, "loss": 0.8712, "step": 9455 }, { "epoch": 0.33400297586196237, "grad_norm": 2.072700023651123, "learning_rate": 7.766217633676175e-06, "loss": 0.8453, "step": 9456 }, { "epoch": 0.3340382976656703, "grad_norm": 1.7029221057891846, "learning_rate": 7.76574112015382e-06, "loss": 0.7973, "step": 9457 }, { "epoch": 0.3340736194693782, "grad_norm": 1.5527923107147217, "learning_rate": 7.765264570433967e-06, "loss": 0.8118, "step": 9458 }, { "epoch": 0.3341089412730861, "grad_norm": 1.5387074947357178, "learning_rate": 7.76478798452285e-06, "loss": 0.8018, "step": 9459 }, { "epoch": 0.334144263076794, "grad_norm": 1.7770131826400757, "learning_rate": 7.76431136242671e-06, "loss": 0.8211, "step": 9460 }, { "epoch": 0.3341795848805019, "grad_norm": 1.7679957151412964, "learning_rate": 7.763834704151782e-06, "loss": 0.8225, "step": 9461 }, { "epoch": 0.33421490668420983, "grad_norm": 1.634665608406067, "learning_rate": 7.763358009704303e-06, "loss": 0.8203, "step": 9462 }, { "epoch": 0.33425022848791774, "grad_norm": 8.070907592773438, "learning_rate": 7.762881279090515e-06, "loss": 0.847, "step": 9463 }, { "epoch": 0.33428555029162565, "grad_norm": 1.5390156507492065, "learning_rate": 7.762404512316655e-06, "loss": 0.8126, "step": 9464 }, { "epoch": 0.33432087209533357, "grad_norm": 1.7071682214736938, "learning_rate": 7.761927709388965e-06, "loss": 0.7947, "step": 9465 }, { "epoch": 0.3343561938990415, "grad_norm": 1.531803846359253, "learning_rate": 7.761450870313685e-06, "loss": 0.8101, "step": 9466 }, { "epoch": 0.3343915157027494, "grad_norm": 1.5296025276184082, "learning_rate": 7.760973995097054e-06, "loss": 0.8071, "step": 9467 }, { "epoch": 0.33442683750645724, "grad_norm": 1.8088210821151733, "learning_rate": 7.760497083745315e-06, "loss": 0.8469, "step": 9468 }, { "epoch": 0.33446215931016515, "grad_norm": 1.6840509176254272, "learning_rate": 7.760020136264708e-06, "loss": 0.863, "step": 9469 }, { "epoch": 0.33449748111387306, "grad_norm": 1.5742024183273315, "learning_rate": 7.759543152661478e-06, "loss": 0.7951, "step": 9470 }, { "epoch": 0.334532802917581, "grad_norm": 1.0893431901931763, "learning_rate": 7.759066132941865e-06, "loss": 0.6328, "step": 9471 }, { "epoch": 0.3345681247212889, "grad_norm": 2.0455188751220703, "learning_rate": 7.758589077112113e-06, "loss": 0.8354, "step": 9472 }, { "epoch": 0.3346034465249968, "grad_norm": 1.9569482803344727, "learning_rate": 7.758111985178467e-06, "loss": 0.8063, "step": 9473 }, { "epoch": 0.3346387683287047, "grad_norm": 1.7347921133041382, "learning_rate": 7.757634857147168e-06, "loss": 0.8086, "step": 9474 }, { "epoch": 0.3346740901324126, "grad_norm": 1.6946077346801758, "learning_rate": 7.757157693024461e-06, "loss": 0.8405, "step": 9475 }, { "epoch": 0.3347094119361205, "grad_norm": 2.2193803787231445, "learning_rate": 7.756680492816595e-06, "loss": 0.84, "step": 9476 }, { "epoch": 0.33474473373982844, "grad_norm": 1.9447388648986816, "learning_rate": 7.756203256529814e-06, "loss": 0.8485, "step": 9477 }, { "epoch": 0.33478005554353635, "grad_norm": 1.9221374988555908, "learning_rate": 7.75572598417036e-06, "loss": 0.8291, "step": 9478 }, { "epoch": 0.33481537734724426, "grad_norm": 1.9219505786895752, "learning_rate": 7.755248675744484e-06, "loss": 0.8464, "step": 9479 }, { "epoch": 0.33485069915095217, "grad_norm": 1.6724435091018677, "learning_rate": 7.754771331258432e-06, "loss": 0.8271, "step": 9480 }, { "epoch": 0.33488602095466, "grad_norm": 2.1012792587280273, "learning_rate": 7.754293950718449e-06, "loss": 0.8332, "step": 9481 }, { "epoch": 0.33492134275836793, "grad_norm": 1.8955068588256836, "learning_rate": 7.753816534130785e-06, "loss": 0.8139, "step": 9482 }, { "epoch": 0.33495666456207585, "grad_norm": 1.6633851528167725, "learning_rate": 7.753339081501687e-06, "loss": 0.8292, "step": 9483 }, { "epoch": 0.33499198636578376, "grad_norm": 1.6005405187606812, "learning_rate": 7.752861592837406e-06, "loss": 0.817, "step": 9484 }, { "epoch": 0.33502730816949167, "grad_norm": 2.05629825592041, "learning_rate": 7.75238406814419e-06, "loss": 0.8416, "step": 9485 }, { "epoch": 0.3350626299731996, "grad_norm": 2.0192196369171143, "learning_rate": 7.751906507428288e-06, "loss": 0.8509, "step": 9486 }, { "epoch": 0.3350979517769075, "grad_norm": 1.8991330862045288, "learning_rate": 7.75142891069595e-06, "loss": 0.8523, "step": 9487 }, { "epoch": 0.3351332735806154, "grad_norm": 1.6887015104293823, "learning_rate": 7.75095127795343e-06, "loss": 0.7957, "step": 9488 }, { "epoch": 0.3351685953843233, "grad_norm": 1.682120442390442, "learning_rate": 7.750473609206976e-06, "loss": 0.7934, "step": 9489 }, { "epoch": 0.3352039171880312, "grad_norm": 2.002594470977783, "learning_rate": 7.74999590446284e-06, "loss": 0.8237, "step": 9490 }, { "epoch": 0.33523923899173913, "grad_norm": 2.023707866668701, "learning_rate": 7.749518163727274e-06, "loss": 0.8216, "step": 9491 }, { "epoch": 0.33527456079544704, "grad_norm": 2.324169158935547, "learning_rate": 7.749040387006532e-06, "loss": 0.8332, "step": 9492 }, { "epoch": 0.33530988259915495, "grad_norm": 1.7306312322616577, "learning_rate": 7.748562574306866e-06, "loss": 0.8007, "step": 9493 }, { "epoch": 0.3353452044028628, "grad_norm": 1.6033530235290527, "learning_rate": 7.74808472563453e-06, "loss": 0.8145, "step": 9494 }, { "epoch": 0.3353805262065707, "grad_norm": 1.8958122730255127, "learning_rate": 7.747606840995776e-06, "loss": 0.844, "step": 9495 }, { "epoch": 0.3354158480102786, "grad_norm": 1.7120031118392944, "learning_rate": 7.747128920396862e-06, "loss": 0.8157, "step": 9496 }, { "epoch": 0.33545116981398654, "grad_norm": 1.52885901927948, "learning_rate": 7.74665096384404e-06, "loss": 0.7887, "step": 9497 }, { "epoch": 0.33548649161769445, "grad_norm": 1.6980878114700317, "learning_rate": 7.74617297134357e-06, "loss": 0.8188, "step": 9498 }, { "epoch": 0.33552181342140236, "grad_norm": 1.6782859563827515, "learning_rate": 7.7456949429017e-06, "loss": 0.8323, "step": 9499 }, { "epoch": 0.33555713522511027, "grad_norm": 1.5868514776229858, "learning_rate": 7.74521687852469e-06, "loss": 0.8012, "step": 9500 }, { "epoch": 0.3355924570288182, "grad_norm": 1.661062240600586, "learning_rate": 7.744738778218801e-06, "loss": 0.8099, "step": 9501 }, { "epoch": 0.3356277788325261, "grad_norm": 1.6745635271072388, "learning_rate": 7.744260641990286e-06, "loss": 0.8123, "step": 9502 }, { "epoch": 0.335663100636234, "grad_norm": 2.1627767086029053, "learning_rate": 7.743782469845403e-06, "loss": 0.8211, "step": 9503 }, { "epoch": 0.3356984224399419, "grad_norm": 1.6758087873458862, "learning_rate": 7.74330426179041e-06, "loss": 0.8197, "step": 9504 }, { "epoch": 0.3357337442436498, "grad_norm": 1.6576088666915894, "learning_rate": 7.742826017831567e-06, "loss": 0.8198, "step": 9505 }, { "epoch": 0.33576906604735773, "grad_norm": 1.8659454584121704, "learning_rate": 7.742347737975133e-06, "loss": 0.8296, "step": 9506 }, { "epoch": 0.3358043878510656, "grad_norm": 2.1759274005889893, "learning_rate": 7.741869422227369e-06, "loss": 0.8429, "step": 9507 }, { "epoch": 0.3358397096547735, "grad_norm": 1.7933013439178467, "learning_rate": 7.74139107059453e-06, "loss": 0.7897, "step": 9508 }, { "epoch": 0.3358750314584814, "grad_norm": 1.7999283075332642, "learning_rate": 7.740912683082883e-06, "loss": 0.8046, "step": 9509 }, { "epoch": 0.3359103532621893, "grad_norm": 1.5823214054107666, "learning_rate": 7.740434259698683e-06, "loss": 0.8375, "step": 9510 }, { "epoch": 0.33594567506589723, "grad_norm": 2.7892355918884277, "learning_rate": 7.739955800448198e-06, "loss": 0.8228, "step": 9511 }, { "epoch": 0.33598099686960514, "grad_norm": 2.062843084335327, "learning_rate": 7.739477305337684e-06, "loss": 0.8513, "step": 9512 }, { "epoch": 0.33601631867331305, "grad_norm": 1.8485114574432373, "learning_rate": 7.738998774373407e-06, "loss": 0.804, "step": 9513 }, { "epoch": 0.33605164047702096, "grad_norm": 1.706856608390808, "learning_rate": 7.73852020756163e-06, "loss": 0.8268, "step": 9514 }, { "epoch": 0.3360869622807289, "grad_norm": 1.5642480850219727, "learning_rate": 7.738041604908613e-06, "loss": 0.8075, "step": 9515 }, { "epoch": 0.3361222840844368, "grad_norm": 1.741451621055603, "learning_rate": 7.737562966420625e-06, "loss": 0.8168, "step": 9516 }, { "epoch": 0.3361576058881447, "grad_norm": 1.5981574058532715, "learning_rate": 7.737084292103925e-06, "loss": 0.779, "step": 9517 }, { "epoch": 0.3361929276918526, "grad_norm": 1.530453085899353, "learning_rate": 7.736605581964781e-06, "loss": 0.8008, "step": 9518 }, { "epoch": 0.3362282494955605, "grad_norm": 2.1593964099884033, "learning_rate": 7.736126836009457e-06, "loss": 0.8867, "step": 9519 }, { "epoch": 0.33626357129926837, "grad_norm": 1.6840699911117554, "learning_rate": 7.735648054244221e-06, "loss": 0.8252, "step": 9520 }, { "epoch": 0.3362988931029763, "grad_norm": 1.5996145009994507, "learning_rate": 7.735169236675336e-06, "loss": 0.7971, "step": 9521 }, { "epoch": 0.3363342149066842, "grad_norm": 1.5992412567138672, "learning_rate": 7.73469038330907e-06, "loss": 0.7734, "step": 9522 }, { "epoch": 0.3363695367103921, "grad_norm": 1.7170159816741943, "learning_rate": 7.734211494151691e-06, "loss": 0.832, "step": 9523 }, { "epoch": 0.3364048585141, "grad_norm": 1.6005467176437378, "learning_rate": 7.733732569209467e-06, "loss": 0.8078, "step": 9524 }, { "epoch": 0.3364401803178079, "grad_norm": 1.508824110031128, "learning_rate": 7.733253608488663e-06, "loss": 0.7891, "step": 9525 }, { "epoch": 0.33647550212151583, "grad_norm": 1.6600972414016724, "learning_rate": 7.732774611995552e-06, "loss": 0.811, "step": 9526 }, { "epoch": 0.33651082392522375, "grad_norm": 1.7946194410324097, "learning_rate": 7.732295579736398e-06, "loss": 0.7781, "step": 9527 }, { "epoch": 0.33654614572893166, "grad_norm": 1.590970754623413, "learning_rate": 7.731816511717476e-06, "loss": 0.8429, "step": 9528 }, { "epoch": 0.33658146753263957, "grad_norm": 0.9739090204238892, "learning_rate": 7.73133740794505e-06, "loss": 0.5957, "step": 9529 }, { "epoch": 0.3366167893363475, "grad_norm": 1.8200901746749878, "learning_rate": 7.730858268425394e-06, "loss": 0.8322, "step": 9530 }, { "epoch": 0.3366521111400554, "grad_norm": 1.6543619632720947, "learning_rate": 7.73037909316478e-06, "loss": 0.852, "step": 9531 }, { "epoch": 0.3366874329437633, "grad_norm": 1.6465715169906616, "learning_rate": 7.729899882169476e-06, "loss": 0.8579, "step": 9532 }, { "epoch": 0.33672275474747115, "grad_norm": 1.7583391666412354, "learning_rate": 7.729420635445756e-06, "loss": 0.8281, "step": 9533 }, { "epoch": 0.33675807655117906, "grad_norm": 1.58531653881073, "learning_rate": 7.728941352999892e-06, "loss": 0.8051, "step": 9534 }, { "epoch": 0.336793398354887, "grad_norm": 1.5335884094238281, "learning_rate": 7.728462034838157e-06, "loss": 0.8061, "step": 9535 }, { "epoch": 0.3368287201585949, "grad_norm": 1.522310495376587, "learning_rate": 7.727982680966824e-06, "loss": 0.7998, "step": 9536 }, { "epoch": 0.3368640419623028, "grad_norm": 1.7455002069473267, "learning_rate": 7.727503291392165e-06, "loss": 0.8566, "step": 9537 }, { "epoch": 0.3368993637660107, "grad_norm": 1.6944032907485962, "learning_rate": 7.727023866120456e-06, "loss": 0.8205, "step": 9538 }, { "epoch": 0.3369346855697186, "grad_norm": 1.6986942291259766, "learning_rate": 7.726544405157971e-06, "loss": 0.8228, "step": 9539 }, { "epoch": 0.33697000737342653, "grad_norm": 1.8183209896087646, "learning_rate": 7.726064908510987e-06, "loss": 0.8358, "step": 9540 }, { "epoch": 0.33700532917713444, "grad_norm": 1.7543525695800781, "learning_rate": 7.725585376185777e-06, "loss": 0.8205, "step": 9541 }, { "epoch": 0.33704065098084235, "grad_norm": 1.5949138402938843, "learning_rate": 7.725105808188618e-06, "loss": 0.7959, "step": 9542 }, { "epoch": 0.33707597278455026, "grad_norm": 1.6051474809646606, "learning_rate": 7.724626204525785e-06, "loss": 0.8215, "step": 9543 }, { "epoch": 0.33711129458825817, "grad_norm": 1.863824486732483, "learning_rate": 7.724146565203558e-06, "loss": 0.8172, "step": 9544 }, { "epoch": 0.3371466163919661, "grad_norm": 1.787712812423706, "learning_rate": 7.723666890228215e-06, "loss": 0.801, "step": 9545 }, { "epoch": 0.33718193819567394, "grad_norm": 1.6804916858673096, "learning_rate": 7.723187179606029e-06, "loss": 0.7991, "step": 9546 }, { "epoch": 0.33721725999938185, "grad_norm": 1.6848084926605225, "learning_rate": 7.72270743334328e-06, "loss": 0.7927, "step": 9547 }, { "epoch": 0.33725258180308976, "grad_norm": 2.0865964889526367, "learning_rate": 7.72222765144625e-06, "loss": 0.8603, "step": 9548 }, { "epoch": 0.33728790360679767, "grad_norm": 1.4891905784606934, "learning_rate": 7.721747833921216e-06, "loss": 0.8015, "step": 9549 }, { "epoch": 0.3373232254105056, "grad_norm": 1.682511568069458, "learning_rate": 7.721267980774455e-06, "loss": 0.8196, "step": 9550 }, { "epoch": 0.3373585472142135, "grad_norm": 1.651731252670288, "learning_rate": 7.720788092012255e-06, "loss": 0.8204, "step": 9551 }, { "epoch": 0.3373938690179214, "grad_norm": 1.732926845550537, "learning_rate": 7.720308167640888e-06, "loss": 0.8186, "step": 9552 }, { "epoch": 0.3374291908216293, "grad_norm": 1.5782225131988525, "learning_rate": 7.719828207666643e-06, "loss": 0.8388, "step": 9553 }, { "epoch": 0.3374645126253372, "grad_norm": 1.6831674575805664, "learning_rate": 7.719348212095793e-06, "loss": 0.8328, "step": 9554 }, { "epoch": 0.33749983442904513, "grad_norm": 1.646437168121338, "learning_rate": 7.718868180934629e-06, "loss": 0.8194, "step": 9555 }, { "epoch": 0.33753515623275304, "grad_norm": 1.8226577043533325, "learning_rate": 7.718388114189427e-06, "loss": 0.8274, "step": 9556 }, { "epoch": 0.33757047803646095, "grad_norm": 1.5904110670089722, "learning_rate": 7.717908011866472e-06, "loss": 0.8081, "step": 9557 }, { "epoch": 0.33760579984016886, "grad_norm": 1.7206556797027588, "learning_rate": 7.71742787397205e-06, "loss": 0.8328, "step": 9558 }, { "epoch": 0.3376411216438767, "grad_norm": 1.743177890777588, "learning_rate": 7.71694770051244e-06, "loss": 0.8269, "step": 9559 }, { "epoch": 0.33767644344758463, "grad_norm": 1.634220838546753, "learning_rate": 7.716467491493932e-06, "loss": 0.8171, "step": 9560 }, { "epoch": 0.33771176525129254, "grad_norm": 1.6995012760162354, "learning_rate": 7.715987246922808e-06, "loss": 0.8356, "step": 9561 }, { "epoch": 0.33774708705500045, "grad_norm": 1.6604628562927246, "learning_rate": 7.715506966805351e-06, "loss": 0.7879, "step": 9562 }, { "epoch": 0.33778240885870836, "grad_norm": 1.9255599975585938, "learning_rate": 7.715026651147851e-06, "loss": 0.8476, "step": 9563 }, { "epoch": 0.33781773066241627, "grad_norm": 1.6600006818771362, "learning_rate": 7.714546299956592e-06, "loss": 0.8031, "step": 9564 }, { "epoch": 0.3378530524661242, "grad_norm": 1.9339549541473389, "learning_rate": 7.714065913237863e-06, "loss": 0.8487, "step": 9565 }, { "epoch": 0.3378883742698321, "grad_norm": 1.7721831798553467, "learning_rate": 7.713585490997946e-06, "loss": 0.8231, "step": 9566 }, { "epoch": 0.33792369607354, "grad_norm": 1.9655582904815674, "learning_rate": 7.713105033243136e-06, "loss": 0.8393, "step": 9567 }, { "epoch": 0.3379590178772479, "grad_norm": 1.7669399976730347, "learning_rate": 7.712624539979715e-06, "loss": 0.8076, "step": 9568 }, { "epoch": 0.3379943396809558, "grad_norm": 1.9308574199676514, "learning_rate": 7.712144011213973e-06, "loss": 0.821, "step": 9569 }, { "epoch": 0.33802966148466373, "grad_norm": 1.8194737434387207, "learning_rate": 7.711663446952203e-06, "loss": 0.8239, "step": 9570 }, { "epoch": 0.33806498328837165, "grad_norm": 1.684478998184204, "learning_rate": 7.71118284720069e-06, "loss": 0.8426, "step": 9571 }, { "epoch": 0.3381003050920795, "grad_norm": 1.655578851699829, "learning_rate": 7.710702211965725e-06, "loss": 0.7998, "step": 9572 }, { "epoch": 0.3381356268957874, "grad_norm": 2.5251526832580566, "learning_rate": 7.7102215412536e-06, "loss": 0.8193, "step": 9573 }, { "epoch": 0.3381709486994953, "grad_norm": 1.7489367723464966, "learning_rate": 7.709740835070605e-06, "loss": 0.8206, "step": 9574 }, { "epoch": 0.33820627050320323, "grad_norm": 2.039454936981201, "learning_rate": 7.70926009342303e-06, "loss": 0.8377, "step": 9575 }, { "epoch": 0.33824159230691114, "grad_norm": 1.7644736766815186, "learning_rate": 7.70877931631717e-06, "loss": 0.8216, "step": 9576 }, { "epoch": 0.33827691411061905, "grad_norm": 1.594677448272705, "learning_rate": 7.708298503759313e-06, "loss": 0.8464, "step": 9577 }, { "epoch": 0.33831223591432696, "grad_norm": 1.7229623794555664, "learning_rate": 7.707817655755756e-06, "loss": 0.8227, "step": 9578 }, { "epoch": 0.3383475577180349, "grad_norm": 2.112090826034546, "learning_rate": 7.70733677231279e-06, "loss": 0.8587, "step": 9579 }, { "epoch": 0.3383828795217428, "grad_norm": 1.783632755279541, "learning_rate": 7.70685585343671e-06, "loss": 0.825, "step": 9580 }, { "epoch": 0.3384182013254507, "grad_norm": 1.6183754205703735, "learning_rate": 7.70637489913381e-06, "loss": 0.8281, "step": 9581 }, { "epoch": 0.3384535231291586, "grad_norm": 1.8134615421295166, "learning_rate": 7.705893909410382e-06, "loss": 0.8301, "step": 9582 }, { "epoch": 0.3384888449328665, "grad_norm": 1.5998200178146362, "learning_rate": 7.705412884272726e-06, "loss": 0.787, "step": 9583 }, { "epoch": 0.33852416673657443, "grad_norm": 1.5482556819915771, "learning_rate": 7.704931823727132e-06, "loss": 0.8114, "step": 9584 }, { "epoch": 0.3385594885402823, "grad_norm": 1.8416534662246704, "learning_rate": 7.704450727779899e-06, "loss": 0.8185, "step": 9585 }, { "epoch": 0.3385948103439902, "grad_norm": 1.669098138809204, "learning_rate": 7.703969596437324e-06, "loss": 0.826, "step": 9586 }, { "epoch": 0.3386301321476981, "grad_norm": 1.714348554611206, "learning_rate": 7.703488429705703e-06, "loss": 0.845, "step": 9587 }, { "epoch": 0.338665453951406, "grad_norm": 1.7890350818634033, "learning_rate": 7.703007227591336e-06, "loss": 0.8107, "step": 9588 }, { "epoch": 0.3387007757551139, "grad_norm": 1.7772002220153809, "learning_rate": 7.702525990100515e-06, "loss": 0.841, "step": 9589 }, { "epoch": 0.33873609755882184, "grad_norm": 1.05138099193573, "learning_rate": 7.702044717239543e-06, "loss": 0.5982, "step": 9590 }, { "epoch": 0.33877141936252975, "grad_norm": 0.9220749139785767, "learning_rate": 7.701563409014719e-06, "loss": 0.5775, "step": 9591 }, { "epoch": 0.33880674116623766, "grad_norm": 1.9106587171554565, "learning_rate": 7.701082065432339e-06, "loss": 0.8571, "step": 9592 }, { "epoch": 0.33884206296994557, "grad_norm": 1.8496633768081665, "learning_rate": 7.700600686498705e-06, "loss": 0.839, "step": 9593 }, { "epoch": 0.3388773847736535, "grad_norm": 1.6780239343643188, "learning_rate": 7.700119272220119e-06, "loss": 0.8262, "step": 9594 }, { "epoch": 0.3389127065773614, "grad_norm": 1.816721796989441, "learning_rate": 7.699637822602877e-06, "loss": 0.8205, "step": 9595 }, { "epoch": 0.3389480283810693, "grad_norm": 1.7679438591003418, "learning_rate": 7.699156337653285e-06, "loss": 0.8259, "step": 9596 }, { "epoch": 0.3389833501847772, "grad_norm": 1.6391061544418335, "learning_rate": 7.69867481737764e-06, "loss": 0.817, "step": 9597 }, { "epoch": 0.33901867198848507, "grad_norm": 1.5987708568572998, "learning_rate": 7.698193261782245e-06, "loss": 0.7996, "step": 9598 }, { "epoch": 0.339053993792193, "grad_norm": 1.6526910066604614, "learning_rate": 7.697711670873407e-06, "loss": 0.8197, "step": 9599 }, { "epoch": 0.3390893155959009, "grad_norm": 1.6934963464736938, "learning_rate": 7.697230044657424e-06, "loss": 0.8057, "step": 9600 }, { "epoch": 0.3391246373996088, "grad_norm": 1.8229306936264038, "learning_rate": 7.696748383140603e-06, "loss": 0.7969, "step": 9601 }, { "epoch": 0.3391599592033167, "grad_norm": 1.6870108842849731, "learning_rate": 7.696266686329245e-06, "loss": 0.8271, "step": 9602 }, { "epoch": 0.3391952810070246, "grad_norm": 1.251813530921936, "learning_rate": 7.695784954229656e-06, "loss": 0.6329, "step": 9603 }, { "epoch": 0.33923060281073253, "grad_norm": 1.96059250831604, "learning_rate": 7.695303186848138e-06, "loss": 0.8465, "step": 9604 }, { "epoch": 0.33926592461444044, "grad_norm": 1.889332890510559, "learning_rate": 7.694821384191001e-06, "loss": 0.8131, "step": 9605 }, { "epoch": 0.33930124641814835, "grad_norm": 1.6191728115081787, "learning_rate": 7.694339546264546e-06, "loss": 0.8347, "step": 9606 }, { "epoch": 0.33933656822185626, "grad_norm": 1.510019063949585, "learning_rate": 7.693857673075085e-06, "loss": 0.8026, "step": 9607 }, { "epoch": 0.33937189002556417, "grad_norm": 0.930341899394989, "learning_rate": 7.693375764628918e-06, "loss": 0.5734, "step": 9608 }, { "epoch": 0.3394072118292721, "grad_norm": 1.8756959438323975, "learning_rate": 7.692893820932358e-06, "loss": 0.8298, "step": 9609 }, { "epoch": 0.33944253363298, "grad_norm": 1.7569257020950317, "learning_rate": 7.692411841991708e-06, "loss": 0.8672, "step": 9610 }, { "epoch": 0.33947785543668785, "grad_norm": 2.0518786907196045, "learning_rate": 7.691929827813278e-06, "loss": 0.8748, "step": 9611 }, { "epoch": 0.33951317724039576, "grad_norm": 1.8358309268951416, "learning_rate": 7.691447778403377e-06, "loss": 0.8281, "step": 9612 }, { "epoch": 0.33954849904410367, "grad_norm": 1.7042946815490723, "learning_rate": 7.690965693768313e-06, "loss": 0.8043, "step": 9613 }, { "epoch": 0.3395838208478116, "grad_norm": 1.6691349744796753, "learning_rate": 7.690483573914396e-06, "loss": 0.8165, "step": 9614 }, { "epoch": 0.3396191426515195, "grad_norm": 1.7292214632034302, "learning_rate": 7.690001418847935e-06, "loss": 0.7921, "step": 9615 }, { "epoch": 0.3396544644552274, "grad_norm": 1.643799066543579, "learning_rate": 7.689519228575243e-06, "loss": 0.8136, "step": 9616 }, { "epoch": 0.3396897862589353, "grad_norm": 1.9094598293304443, "learning_rate": 7.689037003102626e-06, "loss": 0.843, "step": 9617 }, { "epoch": 0.3397251080626432, "grad_norm": 1.8048102855682373, "learning_rate": 7.688554742436403e-06, "loss": 0.8216, "step": 9618 }, { "epoch": 0.33976042986635113, "grad_norm": 1.6290690898895264, "learning_rate": 7.688072446582877e-06, "loss": 0.8192, "step": 9619 }, { "epoch": 0.33979575167005904, "grad_norm": 1.610777735710144, "learning_rate": 7.687590115548364e-06, "loss": 0.8152, "step": 9620 }, { "epoch": 0.33983107347376695, "grad_norm": 1.6780381202697754, "learning_rate": 7.687107749339178e-06, "loss": 0.815, "step": 9621 }, { "epoch": 0.33986639527747486, "grad_norm": 1.6396079063415527, "learning_rate": 7.68662534796163e-06, "loss": 0.8355, "step": 9622 }, { "epoch": 0.3399017170811828, "grad_norm": 1.7985800504684448, "learning_rate": 7.686142911422036e-06, "loss": 0.8332, "step": 9623 }, { "epoch": 0.33993703888489063, "grad_norm": 1.5416384935379028, "learning_rate": 7.685660439726706e-06, "loss": 0.8078, "step": 9624 }, { "epoch": 0.33997236068859854, "grad_norm": 1.5548977851867676, "learning_rate": 7.68517793288196e-06, "loss": 0.8108, "step": 9625 }, { "epoch": 0.34000768249230645, "grad_norm": 1.559895634651184, "learning_rate": 7.684695390894108e-06, "loss": 0.8181, "step": 9626 }, { "epoch": 0.34004300429601436, "grad_norm": 1.8005716800689697, "learning_rate": 7.68421281376947e-06, "loss": 0.8182, "step": 9627 }, { "epoch": 0.3400783260997223, "grad_norm": 1.6253066062927246, "learning_rate": 7.683730201514357e-06, "loss": 0.7824, "step": 9628 }, { "epoch": 0.3401136479034302, "grad_norm": 1.5715528726577759, "learning_rate": 7.683247554135086e-06, "loss": 0.8314, "step": 9629 }, { "epoch": 0.3401489697071381, "grad_norm": 1.5989826917648315, "learning_rate": 7.682764871637977e-06, "loss": 0.8214, "step": 9630 }, { "epoch": 0.340184291510846, "grad_norm": 1.705756425857544, "learning_rate": 7.682282154029348e-06, "loss": 0.7935, "step": 9631 }, { "epoch": 0.3402196133145539, "grad_norm": 1.574942708015442, "learning_rate": 7.68179940131551e-06, "loss": 0.8268, "step": 9632 }, { "epoch": 0.3402549351182618, "grad_norm": 1.656297206878662, "learning_rate": 7.68131661350279e-06, "loss": 0.7888, "step": 9633 }, { "epoch": 0.34029025692196974, "grad_norm": 1.6104875802993774, "learning_rate": 7.6808337905975e-06, "loss": 0.7904, "step": 9634 }, { "epoch": 0.34032557872567765, "grad_norm": 1.7830326557159424, "learning_rate": 7.68035093260596e-06, "loss": 0.8463, "step": 9635 }, { "epoch": 0.34036090052938556, "grad_norm": 1.6218225955963135, "learning_rate": 7.679868039534494e-06, "loss": 0.8132, "step": 9636 }, { "epoch": 0.3403962223330934, "grad_norm": 1.5707670450210571, "learning_rate": 7.679385111389415e-06, "loss": 0.8336, "step": 9637 }, { "epoch": 0.3404315441368013, "grad_norm": 1.7434192895889282, "learning_rate": 7.678902148177051e-06, "loss": 0.8415, "step": 9638 }, { "epoch": 0.34046686594050923, "grad_norm": 1.5910450220108032, "learning_rate": 7.678419149903718e-06, "loss": 0.869, "step": 9639 }, { "epoch": 0.34050218774421714, "grad_norm": 1.7917941808700562, "learning_rate": 7.677936116575737e-06, "loss": 0.7897, "step": 9640 }, { "epoch": 0.34053750954792505, "grad_norm": 1.5506062507629395, "learning_rate": 7.677453048199433e-06, "loss": 0.8287, "step": 9641 }, { "epoch": 0.34057283135163297, "grad_norm": 1.6465500593185425, "learning_rate": 7.676969944781127e-06, "loss": 0.8321, "step": 9642 }, { "epoch": 0.3406081531553409, "grad_norm": 1.5930410623550415, "learning_rate": 7.676486806327141e-06, "loss": 0.8201, "step": 9643 }, { "epoch": 0.3406434749590488, "grad_norm": 1.7612035274505615, "learning_rate": 7.6760036328438e-06, "loss": 0.8265, "step": 9644 }, { "epoch": 0.3406787967627567, "grad_norm": 1.5076237916946411, "learning_rate": 7.675520424337424e-06, "loss": 0.8143, "step": 9645 }, { "epoch": 0.3407141185664646, "grad_norm": 1.6542271375656128, "learning_rate": 7.675037180814343e-06, "loss": 0.8172, "step": 9646 }, { "epoch": 0.3407494403701725, "grad_norm": 1.4512766599655151, "learning_rate": 7.674553902280877e-06, "loss": 0.7963, "step": 9647 }, { "epoch": 0.34078476217388043, "grad_norm": 1.5653388500213623, "learning_rate": 7.67407058874335e-06, "loss": 0.8307, "step": 9648 }, { "epoch": 0.34082008397758834, "grad_norm": 1.770004391670227, "learning_rate": 7.673587240208091e-06, "loss": 0.8768, "step": 9649 }, { "epoch": 0.3408554057812962, "grad_norm": 1.7943058013916016, "learning_rate": 7.673103856681426e-06, "loss": 0.81, "step": 9650 }, { "epoch": 0.3408907275850041, "grad_norm": 1.7202414274215698, "learning_rate": 7.67262043816968e-06, "loss": 0.8145, "step": 9651 }, { "epoch": 0.340926049388712, "grad_norm": 1.636887550354004, "learning_rate": 7.67213698467918e-06, "loss": 0.825, "step": 9652 }, { "epoch": 0.3409613711924199, "grad_norm": 1.62608802318573, "learning_rate": 7.671653496216252e-06, "loss": 0.8254, "step": 9653 }, { "epoch": 0.34099669299612784, "grad_norm": 1.6752532720565796, "learning_rate": 7.671169972787225e-06, "loss": 0.8029, "step": 9654 }, { "epoch": 0.34103201479983575, "grad_norm": 1.9447323083877563, "learning_rate": 7.670686414398431e-06, "loss": 0.8446, "step": 9655 }, { "epoch": 0.34106733660354366, "grad_norm": 1.7458134889602661, "learning_rate": 7.670202821056192e-06, "loss": 0.8208, "step": 9656 }, { "epoch": 0.34110265840725157, "grad_norm": 1.4725180864334106, "learning_rate": 7.669719192766842e-06, "loss": 0.8154, "step": 9657 }, { "epoch": 0.3411379802109595, "grad_norm": 1.8219552040100098, "learning_rate": 7.669235529536708e-06, "loss": 0.8521, "step": 9658 }, { "epoch": 0.3411733020146674, "grad_norm": 1.6357698440551758, "learning_rate": 7.66875183137212e-06, "loss": 0.8463, "step": 9659 }, { "epoch": 0.3412086238183753, "grad_norm": 1.6835676431655884, "learning_rate": 7.668268098279413e-06, "loss": 0.844, "step": 9660 }, { "epoch": 0.3412439456220832, "grad_norm": 1.703380823135376, "learning_rate": 7.667784330264913e-06, "loss": 0.8395, "step": 9661 }, { "epoch": 0.3412792674257911, "grad_norm": 1.6319619417190552, "learning_rate": 7.667300527334953e-06, "loss": 0.8185, "step": 9662 }, { "epoch": 0.341314589229499, "grad_norm": 1.8475691080093384, "learning_rate": 7.666816689495866e-06, "loss": 0.8619, "step": 9663 }, { "epoch": 0.3413499110332069, "grad_norm": 1.7189998626708984, "learning_rate": 7.666332816753983e-06, "loss": 0.8491, "step": 9664 }, { "epoch": 0.3413852328369148, "grad_norm": 1.600737452507019, "learning_rate": 7.665848909115638e-06, "loss": 0.8149, "step": 9665 }, { "epoch": 0.3414205546406227, "grad_norm": 1.5843027830123901, "learning_rate": 7.665364966587161e-06, "loss": 0.8306, "step": 9666 }, { "epoch": 0.3414558764443306, "grad_norm": 1.793489694595337, "learning_rate": 7.66488098917489e-06, "loss": 0.814, "step": 9667 }, { "epoch": 0.34149119824803853, "grad_norm": 1.6223465204238892, "learning_rate": 7.664396976885158e-06, "loss": 0.8154, "step": 9668 }, { "epoch": 0.34152652005174644, "grad_norm": 1.9874939918518066, "learning_rate": 7.663912929724299e-06, "loss": 0.8146, "step": 9669 }, { "epoch": 0.34156184185545435, "grad_norm": 1.732906460762024, "learning_rate": 7.66342884769865e-06, "loss": 0.8451, "step": 9670 }, { "epoch": 0.34159716365916226, "grad_norm": 1.5255663394927979, "learning_rate": 7.662944730814541e-06, "loss": 0.8067, "step": 9671 }, { "epoch": 0.3416324854628702, "grad_norm": 1.5902942419052124, "learning_rate": 7.662460579078315e-06, "loss": 0.7819, "step": 9672 }, { "epoch": 0.3416678072665781, "grad_norm": 1.785247802734375, "learning_rate": 7.661976392496304e-06, "loss": 0.8664, "step": 9673 }, { "epoch": 0.341703129070286, "grad_norm": 1.6103291511535645, "learning_rate": 7.661492171074847e-06, "loss": 0.7891, "step": 9674 }, { "epoch": 0.3417384508739939, "grad_norm": 1.4564992189407349, "learning_rate": 7.661007914820282e-06, "loss": 0.8039, "step": 9675 }, { "epoch": 0.34177377267770176, "grad_norm": 1.5161008834838867, "learning_rate": 7.660523623738943e-06, "loss": 0.8375, "step": 9676 }, { "epoch": 0.34180909448140967, "grad_norm": 1.764840841293335, "learning_rate": 7.660039297837172e-06, "loss": 0.8165, "step": 9677 }, { "epoch": 0.3418444162851176, "grad_norm": 1.825429916381836, "learning_rate": 7.659554937121308e-06, "loss": 0.8321, "step": 9678 }, { "epoch": 0.3418797380888255, "grad_norm": 1.6300591230392456, "learning_rate": 7.659070541597687e-06, "loss": 0.8091, "step": 9679 }, { "epoch": 0.3419150598925334, "grad_norm": 1.6704946756362915, "learning_rate": 7.65858611127265e-06, "loss": 0.8334, "step": 9680 }, { "epoch": 0.3419503816962413, "grad_norm": 1.7602672576904297, "learning_rate": 7.658101646152541e-06, "loss": 0.8175, "step": 9681 }, { "epoch": 0.3419857034999492, "grad_norm": 1.5675026178359985, "learning_rate": 7.657617146243695e-06, "loss": 0.7934, "step": 9682 }, { "epoch": 0.34202102530365713, "grad_norm": 1.7941888570785522, "learning_rate": 7.657132611552456e-06, "loss": 0.7998, "step": 9683 }, { "epoch": 0.34205634710736504, "grad_norm": 1.5489140748977661, "learning_rate": 7.656648042085166e-06, "loss": 0.8263, "step": 9684 }, { "epoch": 0.34209166891107295, "grad_norm": 1.930715560913086, "learning_rate": 7.656163437848163e-06, "loss": 0.8388, "step": 9685 }, { "epoch": 0.34212699071478087, "grad_norm": 1.5475051403045654, "learning_rate": 7.655678798847794e-06, "loss": 0.796, "step": 9686 }, { "epoch": 0.3421623125184888, "grad_norm": 2.3703596591949463, "learning_rate": 7.6551941250904e-06, "loss": 0.826, "step": 9687 }, { "epoch": 0.3421976343221967, "grad_norm": 1.8013386726379395, "learning_rate": 7.654709416582324e-06, "loss": 0.8484, "step": 9688 }, { "epoch": 0.34223295612590454, "grad_norm": 1.6599589586257935, "learning_rate": 7.654224673329911e-06, "loss": 0.7922, "step": 9689 }, { "epoch": 0.34226827792961245, "grad_norm": 1.686700463294983, "learning_rate": 7.653739895339503e-06, "loss": 0.8024, "step": 9690 }, { "epoch": 0.34230359973332036, "grad_norm": 1.5702345371246338, "learning_rate": 7.653255082617448e-06, "loss": 0.8166, "step": 9691 }, { "epoch": 0.3423389215370283, "grad_norm": 1.9828367233276367, "learning_rate": 7.652770235170087e-06, "loss": 0.8295, "step": 9692 }, { "epoch": 0.3423742433407362, "grad_norm": 1.6738686561584473, "learning_rate": 7.65228535300377e-06, "loss": 0.7928, "step": 9693 }, { "epoch": 0.3424095651444441, "grad_norm": 1.7723135948181152, "learning_rate": 7.651800436124839e-06, "loss": 0.843, "step": 9694 }, { "epoch": 0.342444886948152, "grad_norm": 1.6880028247833252, "learning_rate": 7.651315484539643e-06, "loss": 0.8223, "step": 9695 }, { "epoch": 0.3424802087518599, "grad_norm": 1.5059553384780884, "learning_rate": 7.650830498254529e-06, "loss": 0.7928, "step": 9696 }, { "epoch": 0.3425155305555678, "grad_norm": 1.7323273420333862, "learning_rate": 7.650345477275844e-06, "loss": 0.8605, "step": 9697 }, { "epoch": 0.34255085235927574, "grad_norm": 1.5276880264282227, "learning_rate": 7.649860421609933e-06, "loss": 0.8178, "step": 9698 }, { "epoch": 0.34258617416298365, "grad_norm": 1.5820759534835815, "learning_rate": 7.64937533126315e-06, "loss": 0.8203, "step": 9699 }, { "epoch": 0.34262149596669156, "grad_norm": 1.7448420524597168, "learning_rate": 7.64889020624184e-06, "loss": 0.7921, "step": 9700 }, { "epoch": 0.34265681777039947, "grad_norm": 1.7926596403121948, "learning_rate": 7.648405046552353e-06, "loss": 0.8261, "step": 9701 }, { "epoch": 0.3426921395741074, "grad_norm": 0.9777313470840454, "learning_rate": 7.647919852201038e-06, "loss": 0.6041, "step": 9702 }, { "epoch": 0.34272746137781523, "grad_norm": 1.5264164209365845, "learning_rate": 7.647434623194246e-06, "loss": 0.8174, "step": 9703 }, { "epoch": 0.34276278318152315, "grad_norm": 1.533831238746643, "learning_rate": 7.646949359538327e-06, "loss": 0.8011, "step": 9704 }, { "epoch": 0.34279810498523106, "grad_norm": 1.6163338422775269, "learning_rate": 7.646464061239632e-06, "loss": 0.8196, "step": 9705 }, { "epoch": 0.34283342678893897, "grad_norm": 1.6695632934570312, "learning_rate": 7.645978728304513e-06, "loss": 0.8149, "step": 9706 }, { "epoch": 0.3428687485926469, "grad_norm": 1.7738250494003296, "learning_rate": 7.645493360739322e-06, "loss": 0.8041, "step": 9707 }, { "epoch": 0.3429040703963548, "grad_norm": 1.6314506530761719, "learning_rate": 7.645007958550413e-06, "loss": 0.857, "step": 9708 }, { "epoch": 0.3429393922000627, "grad_norm": 1.9268954992294312, "learning_rate": 7.644522521744133e-06, "loss": 0.8203, "step": 9709 }, { "epoch": 0.3429747140037706, "grad_norm": 1.6597938537597656, "learning_rate": 7.644037050326843e-06, "loss": 0.8044, "step": 9710 }, { "epoch": 0.3430100358074785, "grad_norm": 1.627739667892456, "learning_rate": 7.643551544304893e-06, "loss": 0.8317, "step": 9711 }, { "epoch": 0.34304535761118643, "grad_norm": 1.8242034912109375, "learning_rate": 7.643066003684636e-06, "loss": 0.8254, "step": 9712 }, { "epoch": 0.34308067941489434, "grad_norm": 1.7107046842575073, "learning_rate": 7.642580428472428e-06, "loss": 0.8247, "step": 9713 }, { "epoch": 0.34311600121860225, "grad_norm": 1.6682910919189453, "learning_rate": 7.642094818674624e-06, "loss": 0.8205, "step": 9714 }, { "epoch": 0.34315132302231016, "grad_norm": 1.594797134399414, "learning_rate": 7.641609174297579e-06, "loss": 0.836, "step": 9715 }, { "epoch": 0.343186644826018, "grad_norm": 1.7146440744400024, "learning_rate": 7.641123495347651e-06, "loss": 0.8144, "step": 9716 }, { "epoch": 0.3432219666297259, "grad_norm": 1.1434242725372314, "learning_rate": 7.640637781831194e-06, "loss": 0.6225, "step": 9717 }, { "epoch": 0.34325728843343384, "grad_norm": 1.7791231870651245, "learning_rate": 7.640152033754568e-06, "loss": 0.8134, "step": 9718 }, { "epoch": 0.34329261023714175, "grad_norm": 1.4554009437561035, "learning_rate": 7.639666251124126e-06, "loss": 0.8113, "step": 9719 }, { "epoch": 0.34332793204084966, "grad_norm": 1.712570071220398, "learning_rate": 7.639180433946232e-06, "loss": 0.8245, "step": 9720 }, { "epoch": 0.34336325384455757, "grad_norm": 1.7437745332717896, "learning_rate": 7.638694582227236e-06, "loss": 0.8255, "step": 9721 }, { "epoch": 0.3433985756482655, "grad_norm": 1.6152739524841309, "learning_rate": 7.638208695973504e-06, "loss": 0.8162, "step": 9722 }, { "epoch": 0.3434338974519734, "grad_norm": 1.9641048908233643, "learning_rate": 7.637722775191392e-06, "loss": 0.8531, "step": 9723 }, { "epoch": 0.3434692192556813, "grad_norm": 1.8208527565002441, "learning_rate": 7.63723681988726e-06, "loss": 0.7866, "step": 9724 }, { "epoch": 0.3435045410593892, "grad_norm": 1.7835496664047241, "learning_rate": 7.63675083006747e-06, "loss": 0.7946, "step": 9725 }, { "epoch": 0.3435398628630971, "grad_norm": 1.611750602722168, "learning_rate": 7.636264805738378e-06, "loss": 0.8033, "step": 9726 }, { "epoch": 0.34357518466680503, "grad_norm": 1.7121211290359497, "learning_rate": 7.635778746906349e-06, "loss": 0.8585, "step": 9727 }, { "epoch": 0.34361050647051294, "grad_norm": 1.6246614456176758, "learning_rate": 7.635292653577743e-06, "loss": 0.8168, "step": 9728 }, { "epoch": 0.3436458282742208, "grad_norm": 1.7813822031021118, "learning_rate": 7.634806525758924e-06, "loss": 0.8412, "step": 9729 }, { "epoch": 0.3436811500779287, "grad_norm": 1.8961191177368164, "learning_rate": 7.63432036345625e-06, "loss": 0.8346, "step": 9730 }, { "epoch": 0.3437164718816366, "grad_norm": 1.776249647140503, "learning_rate": 7.633834166676088e-06, "loss": 0.8384, "step": 9731 }, { "epoch": 0.34375179368534453, "grad_norm": 1.6342501640319824, "learning_rate": 7.633347935424798e-06, "loss": 0.7779, "step": 9732 }, { "epoch": 0.34378711548905244, "grad_norm": 1.9570159912109375, "learning_rate": 7.632861669708747e-06, "loss": 0.8234, "step": 9733 }, { "epoch": 0.34382243729276035, "grad_norm": 1.6388003826141357, "learning_rate": 7.632375369534295e-06, "loss": 0.8023, "step": 9734 }, { "epoch": 0.34385775909646826, "grad_norm": 1.7043933868408203, "learning_rate": 7.63188903490781e-06, "loss": 0.848, "step": 9735 }, { "epoch": 0.3438930809001762, "grad_norm": 1.663400411605835, "learning_rate": 7.631402665835656e-06, "loss": 0.7888, "step": 9736 }, { "epoch": 0.3439284027038841, "grad_norm": 1.6198573112487793, "learning_rate": 7.6309162623242e-06, "loss": 0.8109, "step": 9737 }, { "epoch": 0.343963724507592, "grad_norm": 1.6556549072265625, "learning_rate": 7.630429824379807e-06, "loss": 0.8288, "step": 9738 }, { "epoch": 0.3439990463112999, "grad_norm": 1.6619434356689453, "learning_rate": 7.629943352008841e-06, "loss": 0.8408, "step": 9739 }, { "epoch": 0.3440343681150078, "grad_norm": 1.7930974960327148, "learning_rate": 7.629456845217671e-06, "loss": 0.8293, "step": 9740 }, { "epoch": 0.3440696899187157, "grad_norm": 1.7643333673477173, "learning_rate": 7.628970304012664e-06, "loss": 0.8522, "step": 9741 }, { "epoch": 0.3441050117224236, "grad_norm": 1.7215585708618164, "learning_rate": 7.6284837284001885e-06, "loss": 0.8113, "step": 9742 }, { "epoch": 0.3441403335261315, "grad_norm": 1.7121801376342773, "learning_rate": 7.627997118386611e-06, "loss": 0.8166, "step": 9743 }, { "epoch": 0.3441756553298394, "grad_norm": 1.8138189315795898, "learning_rate": 7.6275104739783015e-06, "loss": 0.8221, "step": 9744 }, { "epoch": 0.3442109771335473, "grad_norm": 2.621643543243408, "learning_rate": 7.627023795181629e-06, "loss": 0.8105, "step": 9745 }, { "epoch": 0.3442462989372552, "grad_norm": 1.8807724714279175, "learning_rate": 7.626537082002963e-06, "loss": 0.7913, "step": 9746 }, { "epoch": 0.34428162074096313, "grad_norm": 1.640830397605896, "learning_rate": 7.626050334448676e-06, "loss": 0.8102, "step": 9747 }, { "epoch": 0.34431694254467105, "grad_norm": 1.00904381275177, "learning_rate": 7.625563552525133e-06, "loss": 0.6027, "step": 9748 }, { "epoch": 0.34435226434837896, "grad_norm": 1.7114750146865845, "learning_rate": 7.625076736238708e-06, "loss": 0.8505, "step": 9749 }, { "epoch": 0.34438758615208687, "grad_norm": 1.6210678815841675, "learning_rate": 7.624589885595771e-06, "loss": 0.8056, "step": 9750 }, { "epoch": 0.3444229079557948, "grad_norm": 1.659584403038025, "learning_rate": 7.624103000602696e-06, "loss": 0.8275, "step": 9751 }, { "epoch": 0.3444582297595027, "grad_norm": 1.6970012187957764, "learning_rate": 7.623616081265855e-06, "loss": 0.7977, "step": 9752 }, { "epoch": 0.3444935515632106, "grad_norm": 1.5736008882522583, "learning_rate": 7.62312912759162e-06, "loss": 0.8287, "step": 9753 }, { "epoch": 0.3445288733669185, "grad_norm": 1.7347979545593262, "learning_rate": 7.622642139586363e-06, "loss": 0.8363, "step": 9754 }, { "epoch": 0.34456419517062636, "grad_norm": 1.6091865301132202, "learning_rate": 7.622155117256459e-06, "loss": 0.8012, "step": 9755 }, { "epoch": 0.3445995169743343, "grad_norm": 1.5343518257141113, "learning_rate": 7.621668060608282e-06, "loss": 0.8144, "step": 9756 }, { "epoch": 0.3446348387780422, "grad_norm": 1.6453250646591187, "learning_rate": 7.621180969648208e-06, "loss": 0.823, "step": 9757 }, { "epoch": 0.3446701605817501, "grad_norm": 1.525298833847046, "learning_rate": 7.620693844382606e-06, "loss": 0.8001, "step": 9758 }, { "epoch": 0.344705482385458, "grad_norm": 1.8483476638793945, "learning_rate": 7.620206684817859e-06, "loss": 0.8176, "step": 9759 }, { "epoch": 0.3447408041891659, "grad_norm": 1.7162622213363647, "learning_rate": 7.619719490960337e-06, "loss": 0.8397, "step": 9760 }, { "epoch": 0.3447761259928738, "grad_norm": 1.8012385368347168, "learning_rate": 7.6192322628164205e-06, "loss": 0.8022, "step": 9761 }, { "epoch": 0.34481144779658174, "grad_norm": 1.5642398595809937, "learning_rate": 7.618745000392485e-06, "loss": 0.7878, "step": 9762 }, { "epoch": 0.34484676960028965, "grad_norm": 1.774936318397522, "learning_rate": 7.618257703694907e-06, "loss": 0.7889, "step": 9763 }, { "epoch": 0.34488209140399756, "grad_norm": 1.6842081546783447, "learning_rate": 7.617770372730063e-06, "loss": 0.8108, "step": 9764 }, { "epoch": 0.34491741320770547, "grad_norm": 1.6610924005508423, "learning_rate": 7.6172830075043325e-06, "loss": 0.8379, "step": 9765 }, { "epoch": 0.3449527350114134, "grad_norm": 1.7053707838058472, "learning_rate": 7.6167956080240945e-06, "loss": 0.7921, "step": 9766 }, { "epoch": 0.3449880568151213, "grad_norm": 1.9058263301849365, "learning_rate": 7.616308174295728e-06, "loss": 0.7682, "step": 9767 }, { "epoch": 0.34502337861882915, "grad_norm": 2.0475451946258545, "learning_rate": 7.615820706325612e-06, "loss": 0.8185, "step": 9768 }, { "epoch": 0.34505870042253706, "grad_norm": 1.8263909816741943, "learning_rate": 7.615333204120126e-06, "loss": 0.8559, "step": 9769 }, { "epoch": 0.34509402222624497, "grad_norm": 2.116755247116089, "learning_rate": 7.61484566768565e-06, "loss": 0.8296, "step": 9770 }, { "epoch": 0.3451293440299529, "grad_norm": 1.7874267101287842, "learning_rate": 7.614358097028567e-06, "loss": 0.8215, "step": 9771 }, { "epoch": 0.3451646658336608, "grad_norm": 1.5382461547851562, "learning_rate": 7.613870492155255e-06, "loss": 0.821, "step": 9772 }, { "epoch": 0.3451999876373687, "grad_norm": 1.5681883096694946, "learning_rate": 7.613382853072098e-06, "loss": 0.8047, "step": 9773 }, { "epoch": 0.3452353094410766, "grad_norm": 1.7290467023849487, "learning_rate": 7.6128951797854765e-06, "loss": 0.8035, "step": 9774 }, { "epoch": 0.3452706312447845, "grad_norm": 1.8711533546447754, "learning_rate": 7.612407472301777e-06, "loss": 0.8122, "step": 9775 }, { "epoch": 0.34530595304849243, "grad_norm": 1.6519917249679565, "learning_rate": 7.611919730627377e-06, "loss": 0.8117, "step": 9776 }, { "epoch": 0.34534127485220034, "grad_norm": 1.6080596446990967, "learning_rate": 7.611431954768662e-06, "loss": 0.7831, "step": 9777 }, { "epoch": 0.34537659665590825, "grad_norm": 1.6894371509552002, "learning_rate": 7.610944144732019e-06, "loss": 0.8052, "step": 9778 }, { "epoch": 0.34541191845961616, "grad_norm": 1.9012359380722046, "learning_rate": 7.610456300523828e-06, "loss": 0.8367, "step": 9779 }, { "epoch": 0.3454472402633241, "grad_norm": 1.9307981729507446, "learning_rate": 7.609968422150475e-06, "loss": 0.8398, "step": 9780 }, { "epoch": 0.34548256206703193, "grad_norm": 1.7898362874984741, "learning_rate": 7.609480509618348e-06, "loss": 0.8507, "step": 9781 }, { "epoch": 0.34551788387073984, "grad_norm": 1.7593128681182861, "learning_rate": 7.608992562933829e-06, "loss": 0.8167, "step": 9782 }, { "epoch": 0.34555320567444775, "grad_norm": 2.4810376167297363, "learning_rate": 7.608504582103307e-06, "loss": 0.8414, "step": 9783 }, { "epoch": 0.34558852747815566, "grad_norm": 2.431701183319092, "learning_rate": 7.6080165671331675e-06, "loss": 0.7754, "step": 9784 }, { "epoch": 0.34562384928186357, "grad_norm": 1.627166509628296, "learning_rate": 7.607528518029797e-06, "loss": 0.8436, "step": 9785 }, { "epoch": 0.3456591710855715, "grad_norm": 1.662139654159546, "learning_rate": 7.607040434799583e-06, "loss": 0.8163, "step": 9786 }, { "epoch": 0.3456944928892794, "grad_norm": 1.777334213256836, "learning_rate": 7.606552317448913e-06, "loss": 0.8304, "step": 9787 }, { "epoch": 0.3457298146929873, "grad_norm": 1.6542260646820068, "learning_rate": 7.606064165984177e-06, "loss": 0.8181, "step": 9788 }, { "epoch": 0.3457651364966952, "grad_norm": 1.68488347530365, "learning_rate": 7.605575980411763e-06, "loss": 0.8051, "step": 9789 }, { "epoch": 0.3458004583004031, "grad_norm": 1.7125219106674194, "learning_rate": 7.605087760738061e-06, "loss": 0.8457, "step": 9790 }, { "epoch": 0.34583578010411103, "grad_norm": 1.612245798110962, "learning_rate": 7.60459950696946e-06, "loss": 0.8074, "step": 9791 }, { "epoch": 0.34587110190781895, "grad_norm": 1.562088131904602, "learning_rate": 7.604111219112351e-06, "loss": 0.806, "step": 9792 }, { "epoch": 0.34590642371152686, "grad_norm": 1.706037998199463, "learning_rate": 7.603622897173123e-06, "loss": 0.7943, "step": 9793 }, { "epoch": 0.3459417455152347, "grad_norm": 1.8026105165481567, "learning_rate": 7.603134541158168e-06, "loss": 0.8021, "step": 9794 }, { "epoch": 0.3459770673189426, "grad_norm": 1.6066220998764038, "learning_rate": 7.602646151073876e-06, "loss": 0.7868, "step": 9795 }, { "epoch": 0.34601238912265053, "grad_norm": 1.6433155536651611, "learning_rate": 7.602157726926642e-06, "loss": 0.8235, "step": 9796 }, { "epoch": 0.34604771092635844, "grad_norm": 1.5852354764938354, "learning_rate": 7.601669268722857e-06, "loss": 0.8028, "step": 9797 }, { "epoch": 0.34608303273006635, "grad_norm": 1.9744118452072144, "learning_rate": 7.601180776468913e-06, "loss": 0.8186, "step": 9798 }, { "epoch": 0.34611835453377426, "grad_norm": 1.9272115230560303, "learning_rate": 7.600692250171204e-06, "loss": 0.8126, "step": 9799 }, { "epoch": 0.3461536763374822, "grad_norm": 1.6395713090896606, "learning_rate": 7.6002036898361255e-06, "loss": 0.8263, "step": 9800 }, { "epoch": 0.3461889981411901, "grad_norm": 1.7787986993789673, "learning_rate": 7.599715095470067e-06, "loss": 0.8604, "step": 9801 }, { "epoch": 0.346224319944898, "grad_norm": 1.7430610656738281, "learning_rate": 7.599226467079428e-06, "loss": 0.8229, "step": 9802 }, { "epoch": 0.3462596417486059, "grad_norm": 1.9558937549591064, "learning_rate": 7.5987378046706e-06, "loss": 0.8354, "step": 9803 }, { "epoch": 0.3462949635523138, "grad_norm": 1.8077861070632935, "learning_rate": 7.598249108249982e-06, "loss": 0.8296, "step": 9804 }, { "epoch": 0.3463302853560217, "grad_norm": 1.8892877101898193, "learning_rate": 7.5977603778239675e-06, "loss": 0.8163, "step": 9805 }, { "epoch": 0.34636560715972964, "grad_norm": 1.5008025169372559, "learning_rate": 7.597271613398953e-06, "loss": 0.8044, "step": 9806 }, { "epoch": 0.3464009289634375, "grad_norm": 1.6400692462921143, "learning_rate": 7.596782814981336e-06, "loss": 0.8385, "step": 9807 }, { "epoch": 0.3464362507671454, "grad_norm": 1.5422487258911133, "learning_rate": 7.596293982577513e-06, "loss": 0.8704, "step": 9808 }, { "epoch": 0.3464715725708533, "grad_norm": 1.9531341791152954, "learning_rate": 7.595805116193883e-06, "loss": 0.8436, "step": 9809 }, { "epoch": 0.3465068943745612, "grad_norm": 1.6630375385284424, "learning_rate": 7.595316215836843e-06, "loss": 0.821, "step": 9810 }, { "epoch": 0.34654221617826914, "grad_norm": 1.667089819908142, "learning_rate": 7.594827281512794e-06, "loss": 0.8263, "step": 9811 }, { "epoch": 0.34657753798197705, "grad_norm": 1.9077848196029663, "learning_rate": 7.594338313228134e-06, "loss": 0.8492, "step": 9812 }, { "epoch": 0.34661285978568496, "grad_norm": 1.5960416793823242, "learning_rate": 7.593849310989259e-06, "loss": 0.8346, "step": 9813 }, { "epoch": 0.34664818158939287, "grad_norm": 1.6621264219284058, "learning_rate": 7.593360274802573e-06, "loss": 0.8246, "step": 9814 }, { "epoch": 0.3466835033931008, "grad_norm": 1.7337723970413208, "learning_rate": 7.5928712046744745e-06, "loss": 0.7994, "step": 9815 }, { "epoch": 0.3467188251968087, "grad_norm": 1.5650166273117065, "learning_rate": 7.592382100611365e-06, "loss": 0.8427, "step": 9816 }, { "epoch": 0.3467541470005166, "grad_norm": 1.7446589469909668, "learning_rate": 7.591892962619647e-06, "loss": 0.8242, "step": 9817 }, { "epoch": 0.3467894688042245, "grad_norm": 2.0222973823547363, "learning_rate": 7.59140379070572e-06, "loss": 0.7802, "step": 9818 }, { "epoch": 0.3468247906079324, "grad_norm": 1.7217351198196411, "learning_rate": 7.590914584875988e-06, "loss": 0.83, "step": 9819 }, { "epoch": 0.3468601124116403, "grad_norm": 1.5686594247817993, "learning_rate": 7.5904253451368536e-06, "loss": 0.8051, "step": 9820 }, { "epoch": 0.3468954342153482, "grad_norm": 1.7828426361083984, "learning_rate": 7.589936071494719e-06, "loss": 0.7872, "step": 9821 }, { "epoch": 0.3469307560190561, "grad_norm": 1.5781469345092773, "learning_rate": 7.589446763955988e-06, "loss": 0.8244, "step": 9822 }, { "epoch": 0.346966077822764, "grad_norm": 1.8030481338500977, "learning_rate": 7.588957422527063e-06, "loss": 0.859, "step": 9823 }, { "epoch": 0.3470013996264719, "grad_norm": 1.4808827638626099, "learning_rate": 7.588468047214351e-06, "loss": 0.8073, "step": 9824 }, { "epoch": 0.34703672143017983, "grad_norm": 1.7340900897979736, "learning_rate": 7.587978638024256e-06, "loss": 0.8048, "step": 9825 }, { "epoch": 0.34707204323388774, "grad_norm": 1.6105983257293701, "learning_rate": 7.5874891949631825e-06, "loss": 0.821, "step": 9826 }, { "epoch": 0.34710736503759565, "grad_norm": 1.60141122341156, "learning_rate": 7.5869997180375375e-06, "loss": 0.8182, "step": 9827 }, { "epoch": 0.34714268684130356, "grad_norm": 1.6711498498916626, "learning_rate": 7.586510207253726e-06, "loss": 0.7791, "step": 9828 }, { "epoch": 0.34717800864501147, "grad_norm": 1.6913394927978516, "learning_rate": 7.586020662618155e-06, "loss": 0.7818, "step": 9829 }, { "epoch": 0.3472133304487194, "grad_norm": 1.8736149072647095, "learning_rate": 7.585531084137232e-06, "loss": 0.8185, "step": 9830 }, { "epoch": 0.3472486522524273, "grad_norm": 1.7654088735580444, "learning_rate": 7.585041471817365e-06, "loss": 0.8405, "step": 9831 }, { "epoch": 0.3472839740561352, "grad_norm": 1.5414146184921265, "learning_rate": 7.58455182566496e-06, "loss": 0.8101, "step": 9832 }, { "epoch": 0.34731929585984306, "grad_norm": 1.6792316436767578, "learning_rate": 7.584062145686426e-06, "loss": 0.8071, "step": 9833 }, { "epoch": 0.34735461766355097, "grad_norm": 1.6555832624435425, "learning_rate": 7.583572431888174e-06, "loss": 0.8185, "step": 9834 }, { "epoch": 0.3473899394672589, "grad_norm": 1.5584717988967896, "learning_rate": 7.58308268427661e-06, "loss": 0.8474, "step": 9835 }, { "epoch": 0.3474252612709668, "grad_norm": 1.5516074895858765, "learning_rate": 7.582592902858147e-06, "loss": 0.8038, "step": 9836 }, { "epoch": 0.3474605830746747, "grad_norm": 1.6618748903274536, "learning_rate": 7.582103087639192e-06, "loss": 0.8075, "step": 9837 }, { "epoch": 0.3474959048783826, "grad_norm": 1.8161284923553467, "learning_rate": 7.5816132386261584e-06, "loss": 0.7953, "step": 9838 }, { "epoch": 0.3475312266820905, "grad_norm": 1.4808074235916138, "learning_rate": 7.581123355825457e-06, "loss": 0.7803, "step": 9839 }, { "epoch": 0.34756654848579843, "grad_norm": 1.710051417350769, "learning_rate": 7.580633439243496e-06, "loss": 0.831, "step": 9840 }, { "epoch": 0.34760187028950634, "grad_norm": 1.7642388343811035, "learning_rate": 7.580143488886691e-06, "loss": 0.8167, "step": 9841 }, { "epoch": 0.34763719209321425, "grad_norm": 1.7351747751235962, "learning_rate": 7.5796535047614525e-06, "loss": 0.7757, "step": 9842 }, { "epoch": 0.34767251389692216, "grad_norm": 1.6627857685089111, "learning_rate": 7.579163486874194e-06, "loss": 0.8357, "step": 9843 }, { "epoch": 0.3477078357006301, "grad_norm": 1.943377137184143, "learning_rate": 7.578673435231328e-06, "loss": 0.8571, "step": 9844 }, { "epoch": 0.347743157504338, "grad_norm": 1.5663776397705078, "learning_rate": 7.57818334983927e-06, "loss": 0.8167, "step": 9845 }, { "epoch": 0.34777847930804584, "grad_norm": 1.512721061706543, "learning_rate": 7.577693230704433e-06, "loss": 0.7938, "step": 9846 }, { "epoch": 0.34781380111175375, "grad_norm": 1.7625397443771362, "learning_rate": 7.577203077833231e-06, "loss": 0.8319, "step": 9847 }, { "epoch": 0.34784912291546166, "grad_norm": 1.6166187524795532, "learning_rate": 7.576712891232079e-06, "loss": 0.7975, "step": 9848 }, { "epoch": 0.34788444471916957, "grad_norm": 1.9370982646942139, "learning_rate": 7.576222670907394e-06, "loss": 0.7775, "step": 9849 }, { "epoch": 0.3479197665228775, "grad_norm": 1.7851041555404663, "learning_rate": 7.57573241686559e-06, "loss": 0.7953, "step": 9850 }, { "epoch": 0.3479550883265854, "grad_norm": 2.0537707805633545, "learning_rate": 7.575242129113085e-06, "loss": 0.8164, "step": 9851 }, { "epoch": 0.3479904101302933, "grad_norm": 1.6919838190078735, "learning_rate": 7.574751807656296e-06, "loss": 0.8179, "step": 9852 }, { "epoch": 0.3480257319340012, "grad_norm": 1.6257063150405884, "learning_rate": 7.574261452501637e-06, "loss": 0.8236, "step": 9853 }, { "epoch": 0.3480610537377091, "grad_norm": 1.6735464334487915, "learning_rate": 7.573771063655528e-06, "loss": 0.823, "step": 9854 }, { "epoch": 0.34809637554141704, "grad_norm": 1.5743242502212524, "learning_rate": 7.57328064112439e-06, "loss": 0.789, "step": 9855 }, { "epoch": 0.34813169734512495, "grad_norm": 1.5332539081573486, "learning_rate": 7.572790184914636e-06, "loss": 0.8034, "step": 9856 }, { "epoch": 0.34816701914883286, "grad_norm": 1.7788537740707397, "learning_rate": 7.57229969503269e-06, "loss": 0.8221, "step": 9857 }, { "epoch": 0.34820234095254077, "grad_norm": 1.6953595876693726, "learning_rate": 7.5718091714849674e-06, "loss": 0.8341, "step": 9858 }, { "epoch": 0.3482376627562486, "grad_norm": 1.621147871017456, "learning_rate": 7.571318614277892e-06, "loss": 0.8606, "step": 9859 }, { "epoch": 0.34827298455995653, "grad_norm": 1.643412470817566, "learning_rate": 7.57082802341788e-06, "loss": 0.8223, "step": 9860 }, { "epoch": 0.34830830636366444, "grad_norm": 1.6007460355758667, "learning_rate": 7.570337398911354e-06, "loss": 0.8411, "step": 9861 }, { "epoch": 0.34834362816737235, "grad_norm": 1.0689278841018677, "learning_rate": 7.569846740764735e-06, "loss": 0.6057, "step": 9862 }, { "epoch": 0.34837894997108027, "grad_norm": 1.6807962656021118, "learning_rate": 7.569356048984447e-06, "loss": 0.8123, "step": 9863 }, { "epoch": 0.3484142717747882, "grad_norm": 1.846814513206482, "learning_rate": 7.568865323576906e-06, "loss": 0.8154, "step": 9864 }, { "epoch": 0.3484495935784961, "grad_norm": 1.504210352897644, "learning_rate": 7.568374564548542e-06, "loss": 0.8008, "step": 9865 }, { "epoch": 0.348484915382204, "grad_norm": 1.7655001878738403, "learning_rate": 7.567883771905773e-06, "loss": 0.8119, "step": 9866 }, { "epoch": 0.3485202371859119, "grad_norm": 1.6246833801269531, "learning_rate": 7.567392945655026e-06, "loss": 0.8405, "step": 9867 }, { "epoch": 0.3485555589896198, "grad_norm": 1.704748511314392, "learning_rate": 7.566902085802721e-06, "loss": 0.8635, "step": 9868 }, { "epoch": 0.34859088079332773, "grad_norm": 1.9485605955123901, "learning_rate": 7.566411192355284e-06, "loss": 0.8125, "step": 9869 }, { "epoch": 0.34862620259703564, "grad_norm": 1.5687509775161743, "learning_rate": 7.56592026531914e-06, "loss": 0.8333, "step": 9870 }, { "epoch": 0.34866152440074355, "grad_norm": 1.7413383722305298, "learning_rate": 7.565429304700713e-06, "loss": 0.8553, "step": 9871 }, { "epoch": 0.3486968462044514, "grad_norm": 1.6180943250656128, "learning_rate": 7.564938310506431e-06, "loss": 0.8495, "step": 9872 }, { "epoch": 0.3487321680081593, "grad_norm": 1.5455180406570435, "learning_rate": 7.564447282742719e-06, "loss": 0.801, "step": 9873 }, { "epoch": 0.3487674898118672, "grad_norm": 1.486573338508606, "learning_rate": 7.563956221416003e-06, "loss": 0.7981, "step": 9874 }, { "epoch": 0.34880281161557514, "grad_norm": 1.7214443683624268, "learning_rate": 7.563465126532707e-06, "loss": 0.8194, "step": 9875 }, { "epoch": 0.34883813341928305, "grad_norm": 1.656131625175476, "learning_rate": 7.562973998099265e-06, "loss": 0.8343, "step": 9876 }, { "epoch": 0.34887345522299096, "grad_norm": 1.858914852142334, "learning_rate": 7.562482836122099e-06, "loss": 0.8366, "step": 9877 }, { "epoch": 0.34890877702669887, "grad_norm": 1.5926084518432617, "learning_rate": 7.56199164060764e-06, "loss": 0.8147, "step": 9878 }, { "epoch": 0.3489440988304068, "grad_norm": 1.5126012563705444, "learning_rate": 7.561500411562315e-06, "loss": 0.8134, "step": 9879 }, { "epoch": 0.3489794206341147, "grad_norm": 1.7065298557281494, "learning_rate": 7.561009148992556e-06, "loss": 0.7973, "step": 9880 }, { "epoch": 0.3490147424378226, "grad_norm": 1.0825746059417725, "learning_rate": 7.560517852904789e-06, "loss": 0.594, "step": 9881 }, { "epoch": 0.3490500642415305, "grad_norm": 0.9790843725204468, "learning_rate": 7.560026523305446e-06, "loss": 0.604, "step": 9882 }, { "epoch": 0.3490853860452384, "grad_norm": 1.526415467262268, "learning_rate": 7.559535160200957e-06, "loss": 0.8294, "step": 9883 }, { "epoch": 0.34912070784894633, "grad_norm": 1.6826401948928833, "learning_rate": 7.559043763597753e-06, "loss": 0.8243, "step": 9884 }, { "epoch": 0.3491560296526542, "grad_norm": 1.6254240274429321, "learning_rate": 7.558552333502266e-06, "loss": 0.8468, "step": 9885 }, { "epoch": 0.3491913514563621, "grad_norm": 1.8154995441436768, "learning_rate": 7.558060869920927e-06, "loss": 0.8202, "step": 9886 }, { "epoch": 0.34922667326007, "grad_norm": 1.6568892002105713, "learning_rate": 7.557569372860168e-06, "loss": 0.7993, "step": 9887 }, { "epoch": 0.3492619950637779, "grad_norm": 1.686822772026062, "learning_rate": 7.557077842326421e-06, "loss": 0.8396, "step": 9888 }, { "epoch": 0.34929731686748583, "grad_norm": 1.762631893157959, "learning_rate": 7.5565862783261214e-06, "loss": 0.867, "step": 9889 }, { "epoch": 0.34933263867119374, "grad_norm": 1.671546220779419, "learning_rate": 7.5560946808657e-06, "loss": 0.8214, "step": 9890 }, { "epoch": 0.34936796047490165, "grad_norm": 1.5982918739318848, "learning_rate": 7.555603049951592e-06, "loss": 0.8262, "step": 9891 }, { "epoch": 0.34940328227860956, "grad_norm": 1.4561628103256226, "learning_rate": 7.555111385590233e-06, "loss": 0.7834, "step": 9892 }, { "epoch": 0.34943860408231747, "grad_norm": 1.7290502786636353, "learning_rate": 7.554619687788054e-06, "loss": 0.8437, "step": 9893 }, { "epoch": 0.3494739258860254, "grad_norm": 1.802891731262207, "learning_rate": 7.554127956551493e-06, "loss": 0.828, "step": 9894 }, { "epoch": 0.3495092476897333, "grad_norm": 1.7096346616744995, "learning_rate": 7.553636191886988e-06, "loss": 0.817, "step": 9895 }, { "epoch": 0.3495445694934412, "grad_norm": 1.6473100185394287, "learning_rate": 7.553144393800971e-06, "loss": 0.8093, "step": 9896 }, { "epoch": 0.3495798912971491, "grad_norm": 1.7634323835372925, "learning_rate": 7.55265256229988e-06, "loss": 0.8255, "step": 9897 }, { "epoch": 0.34961521310085697, "grad_norm": 1.6006276607513428, "learning_rate": 7.552160697390152e-06, "loss": 0.8233, "step": 9898 }, { "epoch": 0.3496505349045649, "grad_norm": 1.6464601755142212, "learning_rate": 7.551668799078224e-06, "loss": 0.8374, "step": 9899 }, { "epoch": 0.3496858567082728, "grad_norm": 1.6403484344482422, "learning_rate": 7.551176867370535e-06, "loss": 0.8143, "step": 9900 }, { "epoch": 0.3497211785119807, "grad_norm": 1.589842677116394, "learning_rate": 7.550684902273524e-06, "loss": 0.8176, "step": 9901 }, { "epoch": 0.3497565003156886, "grad_norm": 1.6230603456497192, "learning_rate": 7.550192903793627e-06, "loss": 0.8313, "step": 9902 }, { "epoch": 0.3497918221193965, "grad_norm": 1.9039602279663086, "learning_rate": 7.5497008719372845e-06, "loss": 0.8086, "step": 9903 }, { "epoch": 0.34982714392310443, "grad_norm": 1.6950769424438477, "learning_rate": 7.549208806710939e-06, "loss": 0.8025, "step": 9904 }, { "epoch": 0.34986246572681234, "grad_norm": 1.774658441543579, "learning_rate": 7.548716708121024e-06, "loss": 0.8447, "step": 9905 }, { "epoch": 0.34989778753052025, "grad_norm": 1.8047139644622803, "learning_rate": 7.548224576173986e-06, "loss": 0.8305, "step": 9906 }, { "epoch": 0.34993310933422817, "grad_norm": 1.7793402671813965, "learning_rate": 7.547732410876264e-06, "loss": 0.8327, "step": 9907 }, { "epoch": 0.3499684311379361, "grad_norm": 1.7599114179611206, "learning_rate": 7.547240212234298e-06, "loss": 0.813, "step": 9908 }, { "epoch": 0.350003752941644, "grad_norm": 1.8215839862823486, "learning_rate": 7.546747980254532e-06, "loss": 0.804, "step": 9909 }, { "epoch": 0.3500390747453519, "grad_norm": 1.6579478979110718, "learning_rate": 7.546255714943407e-06, "loss": 0.8022, "step": 9910 }, { "epoch": 0.35007439654905975, "grad_norm": 1.783434271812439, "learning_rate": 7.545763416307364e-06, "loss": 0.8286, "step": 9911 }, { "epoch": 0.35010971835276766, "grad_norm": 1.7212677001953125, "learning_rate": 7.5452710843528495e-06, "loss": 0.8205, "step": 9912 }, { "epoch": 0.3501450401564756, "grad_norm": 1.5375617742538452, "learning_rate": 7.544778719086306e-06, "loss": 0.7993, "step": 9913 }, { "epoch": 0.3501803619601835, "grad_norm": 2.2379190921783447, "learning_rate": 7.5442863205141755e-06, "loss": 0.7905, "step": 9914 }, { "epoch": 0.3502156837638914, "grad_norm": 1.7000476121902466, "learning_rate": 7.543793888642906e-06, "loss": 0.8645, "step": 9915 }, { "epoch": 0.3502510055675993, "grad_norm": 1.876236081123352, "learning_rate": 7.543301423478938e-06, "loss": 0.8102, "step": 9916 }, { "epoch": 0.3502863273713072, "grad_norm": 1.7466282844543457, "learning_rate": 7.5428089250287204e-06, "loss": 0.8142, "step": 9917 }, { "epoch": 0.3503216491750151, "grad_norm": 1.6907966136932373, "learning_rate": 7.542316393298698e-06, "loss": 0.7869, "step": 9918 }, { "epoch": 0.35035697097872304, "grad_norm": 1.7721461057662964, "learning_rate": 7.541823828295315e-06, "loss": 0.8237, "step": 9919 }, { "epoch": 0.35039229278243095, "grad_norm": 2.333846092224121, "learning_rate": 7.541331230025021e-06, "loss": 0.8103, "step": 9920 }, { "epoch": 0.35042761458613886, "grad_norm": 1.7818737030029297, "learning_rate": 7.54083859849426e-06, "loss": 0.8067, "step": 9921 }, { "epoch": 0.35046293638984677, "grad_norm": 1.7753835916519165, "learning_rate": 7.540345933709482e-06, "loss": 0.852, "step": 9922 }, { "epoch": 0.3504982581935547, "grad_norm": 1.6552047729492188, "learning_rate": 7.5398532356771345e-06, "loss": 0.8469, "step": 9923 }, { "epoch": 0.35053357999726253, "grad_norm": 1.5447903871536255, "learning_rate": 7.5393605044036655e-06, "loss": 0.8291, "step": 9924 }, { "epoch": 0.35056890180097044, "grad_norm": 1.5154272317886353, "learning_rate": 7.538867739895522e-06, "loss": 0.8218, "step": 9925 }, { "epoch": 0.35060422360467836, "grad_norm": 1.6178866624832153, "learning_rate": 7.538374942159157e-06, "loss": 0.7647, "step": 9926 }, { "epoch": 0.35063954540838627, "grad_norm": 1.6316756010055542, "learning_rate": 7.537882111201016e-06, "loss": 0.8488, "step": 9927 }, { "epoch": 0.3506748672120942, "grad_norm": 1.0584808588027954, "learning_rate": 7.537389247027551e-06, "loss": 0.5595, "step": 9928 }, { "epoch": 0.3507101890158021, "grad_norm": 1.1204259395599365, "learning_rate": 7.536896349645212e-06, "loss": 0.6016, "step": 9929 }, { "epoch": 0.35074551081951, "grad_norm": 1.7677359580993652, "learning_rate": 7.536403419060452e-06, "loss": 0.8006, "step": 9930 }, { "epoch": 0.3507808326232179, "grad_norm": 1.5520296096801758, "learning_rate": 7.53591045527972e-06, "loss": 0.7922, "step": 9931 }, { "epoch": 0.3508161544269258, "grad_norm": 1.8862884044647217, "learning_rate": 7.53541745830947e-06, "loss": 0.844, "step": 9932 }, { "epoch": 0.35085147623063373, "grad_norm": 1.767791986465454, "learning_rate": 7.534924428156152e-06, "loss": 0.8272, "step": 9933 }, { "epoch": 0.35088679803434164, "grad_norm": 1.5941765308380127, "learning_rate": 7.534431364826218e-06, "loss": 0.8075, "step": 9934 }, { "epoch": 0.35092211983804955, "grad_norm": 2.0423800945281982, "learning_rate": 7.533938268326125e-06, "loss": 0.8134, "step": 9935 }, { "epoch": 0.35095744164175746, "grad_norm": 1.77082359790802, "learning_rate": 7.533445138662323e-06, "loss": 0.7822, "step": 9936 }, { "epoch": 0.3509927634454653, "grad_norm": 1.687608242034912, "learning_rate": 7.532951975841267e-06, "loss": 0.8158, "step": 9937 }, { "epoch": 0.3510280852491732, "grad_norm": 1.573456883430481, "learning_rate": 7.532458779869414e-06, "loss": 0.8036, "step": 9938 }, { "epoch": 0.35106340705288114, "grad_norm": 1.2708293199539185, "learning_rate": 7.531965550753215e-06, "loss": 0.623, "step": 9939 }, { "epoch": 0.35109872885658905, "grad_norm": 1.6341999769210815, "learning_rate": 7.531472288499126e-06, "loss": 0.7961, "step": 9940 }, { "epoch": 0.35113405066029696, "grad_norm": 1.702078104019165, "learning_rate": 7.530978993113605e-06, "loss": 0.8279, "step": 9941 }, { "epoch": 0.35116937246400487, "grad_norm": 1.6766431331634521, "learning_rate": 7.530485664603107e-06, "loss": 0.8426, "step": 9942 }, { "epoch": 0.3512046942677128, "grad_norm": 1.6925369501113892, "learning_rate": 7.529992302974086e-06, "loss": 0.8236, "step": 9943 }, { "epoch": 0.3512400160714207, "grad_norm": 1.7983336448669434, "learning_rate": 7.529498908233004e-06, "loss": 0.817, "step": 9944 }, { "epoch": 0.3512753378751286, "grad_norm": 1.6264115571975708, "learning_rate": 7.529005480386313e-06, "loss": 0.8021, "step": 9945 }, { "epoch": 0.3513106596788365, "grad_norm": 1.721604824066162, "learning_rate": 7.528512019440475e-06, "loss": 0.8277, "step": 9946 }, { "epoch": 0.3513459814825444, "grad_norm": 1.5477837324142456, "learning_rate": 7.5280185254019476e-06, "loss": 0.8241, "step": 9947 }, { "epoch": 0.35138130328625233, "grad_norm": 1.610116958618164, "learning_rate": 7.527524998277188e-06, "loss": 0.8096, "step": 9948 }, { "epoch": 0.35141662508996024, "grad_norm": 1.7036528587341309, "learning_rate": 7.527031438072658e-06, "loss": 0.8181, "step": 9949 }, { "epoch": 0.3514519468936681, "grad_norm": 1.6177382469177246, "learning_rate": 7.5265378447948145e-06, "loss": 0.8259, "step": 9950 }, { "epoch": 0.351487268697376, "grad_norm": 1.8646563291549683, "learning_rate": 7.526044218450118e-06, "loss": 0.8684, "step": 9951 }, { "epoch": 0.3515225905010839, "grad_norm": 1.685669183731079, "learning_rate": 7.52555055904503e-06, "loss": 0.8346, "step": 9952 }, { "epoch": 0.35155791230479183, "grad_norm": 1.7099969387054443, "learning_rate": 7.5250568665860115e-06, "loss": 0.8262, "step": 9953 }, { "epoch": 0.35159323410849974, "grad_norm": 1.6205101013183594, "learning_rate": 7.524563141079523e-06, "loss": 0.8029, "step": 9954 }, { "epoch": 0.35162855591220765, "grad_norm": 1.6884956359863281, "learning_rate": 7.524069382532026e-06, "loss": 0.7905, "step": 9955 }, { "epoch": 0.35166387771591556, "grad_norm": 1.7280097007751465, "learning_rate": 7.523575590949984e-06, "loss": 0.8231, "step": 9956 }, { "epoch": 0.3516991995196235, "grad_norm": 1.5986742973327637, "learning_rate": 7.523081766339859e-06, "loss": 0.7984, "step": 9957 }, { "epoch": 0.3517345213233314, "grad_norm": 1.6088511943817139, "learning_rate": 7.522587908708114e-06, "loss": 0.8202, "step": 9958 }, { "epoch": 0.3517698431270393, "grad_norm": 1.5517417192459106, "learning_rate": 7.522094018061213e-06, "loss": 0.8311, "step": 9959 }, { "epoch": 0.3518051649307472, "grad_norm": 1.7525713443756104, "learning_rate": 7.521600094405621e-06, "loss": 0.8348, "step": 9960 }, { "epoch": 0.3518404867344551, "grad_norm": 1.5455576181411743, "learning_rate": 7.521106137747799e-06, "loss": 0.781, "step": 9961 }, { "epoch": 0.351875808538163, "grad_norm": 1.6043980121612549, "learning_rate": 7.5206121480942155e-06, "loss": 0.8072, "step": 9962 }, { "epoch": 0.3519111303418709, "grad_norm": 1.624985933303833, "learning_rate": 7.520118125451331e-06, "loss": 0.7987, "step": 9963 }, { "epoch": 0.3519464521455788, "grad_norm": 1.7085720300674438, "learning_rate": 7.519624069825617e-06, "loss": 0.8032, "step": 9964 }, { "epoch": 0.3519817739492867, "grad_norm": 1.7279353141784668, "learning_rate": 7.519129981223536e-06, "loss": 0.8209, "step": 9965 }, { "epoch": 0.3520170957529946, "grad_norm": 1.6528936624526978, "learning_rate": 7.518635859651554e-06, "loss": 0.8483, "step": 9966 }, { "epoch": 0.3520524175567025, "grad_norm": 1.6132904291152954, "learning_rate": 7.518141705116141e-06, "loss": 0.8466, "step": 9967 }, { "epoch": 0.35208773936041043, "grad_norm": 1.0186794996261597, "learning_rate": 7.517647517623763e-06, "loss": 0.5864, "step": 9968 }, { "epoch": 0.35212306116411835, "grad_norm": 1.8037257194519043, "learning_rate": 7.517153297180886e-06, "loss": 0.8536, "step": 9969 }, { "epoch": 0.35215838296782626, "grad_norm": 1.7001909017562866, "learning_rate": 7.516659043793981e-06, "loss": 0.8182, "step": 9970 }, { "epoch": 0.35219370477153417, "grad_norm": 1.6836823225021362, "learning_rate": 7.5161647574695155e-06, "loss": 0.8296, "step": 9971 }, { "epoch": 0.3522290265752421, "grad_norm": 1.6418620347976685, "learning_rate": 7.5156704382139576e-06, "loss": 0.7928, "step": 9972 }, { "epoch": 0.35226434837895, "grad_norm": 1.6090539693832397, "learning_rate": 7.51517608603378e-06, "loss": 0.8368, "step": 9973 }, { "epoch": 0.3522996701826579, "grad_norm": 1.6192466020584106, "learning_rate": 7.5146817009354474e-06, "loss": 0.8197, "step": 9974 }, { "epoch": 0.3523349919863658, "grad_norm": 1.779045820236206, "learning_rate": 7.514187282925436e-06, "loss": 0.8069, "step": 9975 }, { "epoch": 0.35237031379007366, "grad_norm": 1.6558173894882202, "learning_rate": 7.5136928320102135e-06, "loss": 0.8392, "step": 9976 }, { "epoch": 0.3524056355937816, "grad_norm": 1.6888632774353027, "learning_rate": 7.5131983481962515e-06, "loss": 0.8047, "step": 9977 }, { "epoch": 0.3524409573974895, "grad_norm": 1.512380838394165, "learning_rate": 7.512703831490022e-06, "loss": 0.8311, "step": 9978 }, { "epoch": 0.3524762792011974, "grad_norm": 1.7251160144805908, "learning_rate": 7.512209281897996e-06, "loss": 0.846, "step": 9979 }, { "epoch": 0.3525116010049053, "grad_norm": 2.732719898223877, "learning_rate": 7.5117146994266485e-06, "loss": 0.8167, "step": 9980 }, { "epoch": 0.3525469228086132, "grad_norm": 1.8228687047958374, "learning_rate": 7.511220084082451e-06, "loss": 0.7799, "step": 9981 }, { "epoch": 0.3525822446123211, "grad_norm": 1.571420431137085, "learning_rate": 7.510725435871877e-06, "loss": 0.8197, "step": 9982 }, { "epoch": 0.35261756641602904, "grad_norm": 1.7305679321289062, "learning_rate": 7.5102307548014e-06, "loss": 0.7803, "step": 9983 }, { "epoch": 0.35265288821973695, "grad_norm": 1.7196067571640015, "learning_rate": 7.509736040877495e-06, "loss": 0.8508, "step": 9984 }, { "epoch": 0.35268821002344486, "grad_norm": 1.72420334815979, "learning_rate": 7.509241294106637e-06, "loss": 0.834, "step": 9985 }, { "epoch": 0.35272353182715277, "grad_norm": 1.7698355913162231, "learning_rate": 7.5087465144953e-06, "loss": 0.8192, "step": 9986 }, { "epoch": 0.3527588536308607, "grad_norm": 1.7400963306427002, "learning_rate": 7.50825170204996e-06, "loss": 0.8121, "step": 9987 }, { "epoch": 0.3527941754345686, "grad_norm": 1.733285903930664, "learning_rate": 7.507756856777096e-06, "loss": 0.817, "step": 9988 }, { "epoch": 0.35282949723827645, "grad_norm": 1.6508193016052246, "learning_rate": 7.507261978683178e-06, "loss": 0.8449, "step": 9989 }, { "epoch": 0.35286481904198436, "grad_norm": 1.5601766109466553, "learning_rate": 7.506767067774687e-06, "loss": 0.817, "step": 9990 }, { "epoch": 0.35290014084569227, "grad_norm": 1.7077975273132324, "learning_rate": 7.506272124058102e-06, "loss": 0.8064, "step": 9991 }, { "epoch": 0.3529354626494002, "grad_norm": 1.6435017585754395, "learning_rate": 7.505777147539897e-06, "loss": 0.8164, "step": 9992 }, { "epoch": 0.3529707844531081, "grad_norm": 1.6198952198028564, "learning_rate": 7.505282138226551e-06, "loss": 0.8144, "step": 9993 }, { "epoch": 0.353006106256816, "grad_norm": 1.5991054773330688, "learning_rate": 7.504787096124545e-06, "loss": 0.8492, "step": 9994 }, { "epoch": 0.3530414280605239, "grad_norm": 1.431686282157898, "learning_rate": 7.504292021240355e-06, "loss": 0.7718, "step": 9995 }, { "epoch": 0.3530767498642318, "grad_norm": 1.5997858047485352, "learning_rate": 7.503796913580463e-06, "loss": 0.8093, "step": 9996 }, { "epoch": 0.35311207166793973, "grad_norm": 1.6664999723434448, "learning_rate": 7.503301773151347e-06, "loss": 0.8055, "step": 9997 }, { "epoch": 0.35314739347164764, "grad_norm": 1.721942663192749, "learning_rate": 7.502806599959489e-06, "loss": 0.7853, "step": 9998 }, { "epoch": 0.35318271527535555, "grad_norm": 1.742141842842102, "learning_rate": 7.502311394011368e-06, "loss": 0.8289, "step": 9999 }, { "epoch": 0.35321803707906346, "grad_norm": 1.5527424812316895, "learning_rate": 7.501816155313464e-06, "loss": 0.7879, "step": 10000 }, { "epoch": 0.3532533588827714, "grad_norm": 1.6692975759506226, "learning_rate": 7.501320883872263e-06, "loss": 0.8284, "step": 10001 }, { "epoch": 0.35328868068647923, "grad_norm": 1.7579684257507324, "learning_rate": 7.500825579694243e-06, "loss": 0.8355, "step": 10002 }, { "epoch": 0.35332400249018714, "grad_norm": 2.3685996532440186, "learning_rate": 7.500330242785888e-06, "loss": 0.8178, "step": 10003 }, { "epoch": 0.35335932429389505, "grad_norm": 1.5604133605957031, "learning_rate": 7.499834873153682e-06, "loss": 0.8332, "step": 10004 }, { "epoch": 0.35339464609760296, "grad_norm": 1.728198766708374, "learning_rate": 7.499339470804106e-06, "loss": 0.8587, "step": 10005 }, { "epoch": 0.35342996790131087, "grad_norm": 1.7978260517120361, "learning_rate": 7.498844035743646e-06, "loss": 0.7828, "step": 10006 }, { "epoch": 0.3534652897050188, "grad_norm": 1.4618911743164062, "learning_rate": 7.498348567978783e-06, "loss": 0.808, "step": 10007 }, { "epoch": 0.3535006115087267, "grad_norm": 1.5829954147338867, "learning_rate": 7.497853067516006e-06, "loss": 0.8092, "step": 10008 }, { "epoch": 0.3535359333124346, "grad_norm": 0.9466282725334167, "learning_rate": 7.497357534361796e-06, "loss": 0.5749, "step": 10009 }, { "epoch": 0.3535712551161425, "grad_norm": 1.7215455770492554, "learning_rate": 7.496861968522641e-06, "loss": 0.8063, "step": 10010 }, { "epoch": 0.3536065769198504, "grad_norm": 1.6710541248321533, "learning_rate": 7.4963663700050234e-06, "loss": 0.8209, "step": 10011 }, { "epoch": 0.35364189872355833, "grad_norm": 1.7027732133865356, "learning_rate": 7.495870738815435e-06, "loss": 0.8234, "step": 10012 }, { "epoch": 0.35367722052726625, "grad_norm": 1.629604697227478, "learning_rate": 7.495375074960359e-06, "loss": 0.8244, "step": 10013 }, { "epoch": 0.35371254233097416, "grad_norm": 1.64569091796875, "learning_rate": 7.494879378446282e-06, "loss": 0.8191, "step": 10014 }, { "epoch": 0.353747864134682, "grad_norm": 1.8508000373840332, "learning_rate": 7.494383649279694e-06, "loss": 0.8424, "step": 10015 }, { "epoch": 0.3537831859383899, "grad_norm": 1.465283989906311, "learning_rate": 7.4938878874670795e-06, "loss": 0.791, "step": 10016 }, { "epoch": 0.35381850774209783, "grad_norm": 1.6552889347076416, "learning_rate": 7.493392093014931e-06, "loss": 0.8311, "step": 10017 }, { "epoch": 0.35385382954580574, "grad_norm": 1.6990355253219604, "learning_rate": 7.492896265929736e-06, "loss": 0.832, "step": 10018 }, { "epoch": 0.35388915134951365, "grad_norm": 1.579833984375, "learning_rate": 7.492400406217981e-06, "loss": 0.7912, "step": 10019 }, { "epoch": 0.35392447315322156, "grad_norm": 1.668305516242981, "learning_rate": 7.49190451388616e-06, "loss": 0.796, "step": 10020 }, { "epoch": 0.3539597949569295, "grad_norm": 1.6746735572814941, "learning_rate": 7.49140858894076e-06, "loss": 0.7999, "step": 10021 }, { "epoch": 0.3539951167606374, "grad_norm": 1.6593226194381714, "learning_rate": 7.490912631388273e-06, "loss": 0.7907, "step": 10022 }, { "epoch": 0.3540304385643453, "grad_norm": 1.6637893915176392, "learning_rate": 7.49041664123519e-06, "loss": 0.8229, "step": 10023 }, { "epoch": 0.3540657603680532, "grad_norm": 1.6131750345230103, "learning_rate": 7.489920618488002e-06, "loss": 0.8421, "step": 10024 }, { "epoch": 0.3541010821717611, "grad_norm": 1.5727534294128418, "learning_rate": 7.489424563153203e-06, "loss": 0.819, "step": 10025 }, { "epoch": 0.354136403975469, "grad_norm": 1.6141663789749146, "learning_rate": 7.488928475237279e-06, "loss": 0.792, "step": 10026 }, { "epoch": 0.35417172577917694, "grad_norm": 1.807745337486267, "learning_rate": 7.488432354746731e-06, "loss": 0.8195, "step": 10027 }, { "epoch": 0.3542070475828848, "grad_norm": 1.7171478271484375, "learning_rate": 7.487936201688046e-06, "loss": 0.8244, "step": 10028 }, { "epoch": 0.3542423693865927, "grad_norm": 1.968160629272461, "learning_rate": 7.487440016067719e-06, "loss": 0.7848, "step": 10029 }, { "epoch": 0.3542776911903006, "grad_norm": 1.8688998222351074, "learning_rate": 7.4869437978922465e-06, "loss": 0.7944, "step": 10030 }, { "epoch": 0.3543130129940085, "grad_norm": 1.8248904943466187, "learning_rate": 7.486447547168121e-06, "loss": 0.8188, "step": 10031 }, { "epoch": 0.35434833479771644, "grad_norm": 1.6298753023147583, "learning_rate": 7.485951263901836e-06, "loss": 0.8594, "step": 10032 }, { "epoch": 0.35438365660142435, "grad_norm": 1.6667977571487427, "learning_rate": 7.485454948099889e-06, "loss": 0.818, "step": 10033 }, { "epoch": 0.35441897840513226, "grad_norm": 1.7763729095458984, "learning_rate": 7.4849585997687745e-06, "loss": 0.8149, "step": 10034 }, { "epoch": 0.35445430020884017, "grad_norm": 1.6504665613174438, "learning_rate": 7.48446221891499e-06, "loss": 0.8354, "step": 10035 }, { "epoch": 0.3544896220125481, "grad_norm": 5.204479694366455, "learning_rate": 7.48396580554503e-06, "loss": 0.8215, "step": 10036 }, { "epoch": 0.354524943816256, "grad_norm": 1.672054648399353, "learning_rate": 7.483469359665392e-06, "loss": 0.8249, "step": 10037 }, { "epoch": 0.3545602656199639, "grad_norm": 1.9264953136444092, "learning_rate": 7.482972881282576e-06, "loss": 0.8068, "step": 10038 }, { "epoch": 0.3545955874236718, "grad_norm": 1.5835657119750977, "learning_rate": 7.4824763704030754e-06, "loss": 0.8322, "step": 10039 }, { "epoch": 0.3546309092273797, "grad_norm": 3.2218639850616455, "learning_rate": 7.481979827033391e-06, "loss": 0.8606, "step": 10040 }, { "epoch": 0.3546662310310876, "grad_norm": 1.7584154605865479, "learning_rate": 7.481483251180021e-06, "loss": 0.841, "step": 10041 }, { "epoch": 0.3547015528347955, "grad_norm": 1.7984105348587036, "learning_rate": 7.480986642849467e-06, "loss": 0.8121, "step": 10042 }, { "epoch": 0.3547368746385034, "grad_norm": 1.6380006074905396, "learning_rate": 7.4804900020482255e-06, "loss": 0.8236, "step": 10043 }, { "epoch": 0.3547721964422113, "grad_norm": 1.710442066192627, "learning_rate": 7.4799933287827956e-06, "loss": 0.8337, "step": 10044 }, { "epoch": 0.3548075182459192, "grad_norm": 1.6615756750106812, "learning_rate": 7.479496623059679e-06, "loss": 0.8348, "step": 10045 }, { "epoch": 0.35484284004962713, "grad_norm": 1.6835509538650513, "learning_rate": 7.478999884885378e-06, "loss": 0.7948, "step": 10046 }, { "epoch": 0.35487816185333504, "grad_norm": 1.600923776626587, "learning_rate": 7.478503114266392e-06, "loss": 0.7915, "step": 10047 }, { "epoch": 0.35491348365704295, "grad_norm": 1.6316516399383545, "learning_rate": 7.478006311209223e-06, "loss": 0.7845, "step": 10048 }, { "epoch": 0.35494880546075086, "grad_norm": 1.8482260704040527, "learning_rate": 7.477509475720373e-06, "loss": 0.8388, "step": 10049 }, { "epoch": 0.35498412726445877, "grad_norm": 1.5465649366378784, "learning_rate": 7.477012607806345e-06, "loss": 0.7989, "step": 10050 }, { "epoch": 0.3550194490681667, "grad_norm": 1.55811607837677, "learning_rate": 7.476515707473642e-06, "loss": 0.7917, "step": 10051 }, { "epoch": 0.3550547708718746, "grad_norm": 1.970558762550354, "learning_rate": 7.476018774728768e-06, "loss": 0.8314, "step": 10052 }, { "epoch": 0.3550900926755825, "grad_norm": 1.8847779035568237, "learning_rate": 7.475521809578223e-06, "loss": 0.7932, "step": 10053 }, { "epoch": 0.35512541447929036, "grad_norm": 3.748788595199585, "learning_rate": 7.4750248120285164e-06, "loss": 0.7902, "step": 10054 }, { "epoch": 0.35516073628299827, "grad_norm": 1.6838536262512207, "learning_rate": 7.474527782086149e-06, "loss": 0.7997, "step": 10055 }, { "epoch": 0.3551960580867062, "grad_norm": 1.5968165397644043, "learning_rate": 7.474030719757628e-06, "loss": 0.8327, "step": 10056 }, { "epoch": 0.3552313798904141, "grad_norm": 1.7181452512741089, "learning_rate": 7.473533625049457e-06, "loss": 0.8215, "step": 10057 }, { "epoch": 0.355266701694122, "grad_norm": 1.6441057920455933, "learning_rate": 7.4730364979681426e-06, "loss": 0.8269, "step": 10058 }, { "epoch": 0.3553020234978299, "grad_norm": 1.7453351020812988, "learning_rate": 7.472539338520193e-06, "loss": 0.8515, "step": 10059 }, { "epoch": 0.3553373453015378, "grad_norm": 1.931858777999878, "learning_rate": 7.472042146712112e-06, "loss": 0.8241, "step": 10060 }, { "epoch": 0.35537266710524573, "grad_norm": 2.054525136947632, "learning_rate": 7.471544922550409e-06, "loss": 0.7696, "step": 10061 }, { "epoch": 0.35540798890895364, "grad_norm": 1.7903918027877808, "learning_rate": 7.47104766604159e-06, "loss": 0.7982, "step": 10062 }, { "epoch": 0.35544331071266155, "grad_norm": 1.8369076251983643, "learning_rate": 7.470550377192163e-06, "loss": 0.7772, "step": 10063 }, { "epoch": 0.35547863251636946, "grad_norm": 1.7126052379608154, "learning_rate": 7.470053056008638e-06, "loss": 0.7874, "step": 10064 }, { "epoch": 0.3555139543200774, "grad_norm": 2.0403006076812744, "learning_rate": 7.469555702497523e-06, "loss": 0.8204, "step": 10065 }, { "epoch": 0.3555492761237853, "grad_norm": 1.6296967267990112, "learning_rate": 7.469058316665326e-06, "loss": 0.8038, "step": 10066 }, { "epoch": 0.35558459792749314, "grad_norm": 1.6516923904418945, "learning_rate": 7.46856089851856e-06, "loss": 0.7989, "step": 10067 }, { "epoch": 0.35561991973120105, "grad_norm": 2.1710543632507324, "learning_rate": 7.468063448063732e-06, "loss": 0.828, "step": 10068 }, { "epoch": 0.35565524153490896, "grad_norm": 1.8451849222183228, "learning_rate": 7.467565965307353e-06, "loss": 0.8147, "step": 10069 }, { "epoch": 0.35569056333861687, "grad_norm": 2.1655666828155518, "learning_rate": 7.467068450255936e-06, "loss": 0.8452, "step": 10070 }, { "epoch": 0.3557258851423248, "grad_norm": 1.7595137357711792, "learning_rate": 7.4665709029159885e-06, "loss": 0.8286, "step": 10071 }, { "epoch": 0.3557612069460327, "grad_norm": 1.7170809507369995, "learning_rate": 7.466073323294027e-06, "loss": 0.8331, "step": 10072 }, { "epoch": 0.3557965287497406, "grad_norm": 1.6587265729904175, "learning_rate": 7.46557571139656e-06, "loss": 0.7904, "step": 10073 }, { "epoch": 0.3558318505534485, "grad_norm": 1.6794739961624146, "learning_rate": 7.465078067230102e-06, "loss": 0.8247, "step": 10074 }, { "epoch": 0.3558671723571564, "grad_norm": 1.6429697275161743, "learning_rate": 7.464580390801165e-06, "loss": 0.8147, "step": 10075 }, { "epoch": 0.35590249416086434, "grad_norm": 2.1335716247558594, "learning_rate": 7.464082682116264e-06, "loss": 0.8006, "step": 10076 }, { "epoch": 0.35593781596457225, "grad_norm": 1.5742167234420776, "learning_rate": 7.463584941181912e-06, "loss": 0.7968, "step": 10077 }, { "epoch": 0.35597313776828016, "grad_norm": 1.694357991218567, "learning_rate": 7.463087168004622e-06, "loss": 0.8437, "step": 10078 }, { "epoch": 0.35600845957198807, "grad_norm": 1.7469711303710938, "learning_rate": 7.462589362590911e-06, "loss": 0.8192, "step": 10079 }, { "epoch": 0.3560437813756959, "grad_norm": 1.752434492111206, "learning_rate": 7.462091524947295e-06, "loss": 0.8584, "step": 10080 }, { "epoch": 0.35607910317940383, "grad_norm": 1.5635316371917725, "learning_rate": 7.461593655080287e-06, "loss": 0.7821, "step": 10081 }, { "epoch": 0.35611442498311174, "grad_norm": 1.5833405256271362, "learning_rate": 7.461095752996402e-06, "loss": 0.8669, "step": 10082 }, { "epoch": 0.35614974678681965, "grad_norm": 1.479912281036377, "learning_rate": 7.460597818702161e-06, "loss": 0.777, "step": 10083 }, { "epoch": 0.35618506859052756, "grad_norm": 1.53940749168396, "learning_rate": 7.460099852204077e-06, "loss": 0.803, "step": 10084 }, { "epoch": 0.3562203903942355, "grad_norm": 0.9763912558555603, "learning_rate": 7.459601853508669e-06, "loss": 0.5765, "step": 10085 }, { "epoch": 0.3562557121979434, "grad_norm": 1.851380705833435, "learning_rate": 7.459103822622454e-06, "loss": 0.8051, "step": 10086 }, { "epoch": 0.3562910340016513, "grad_norm": 1.7434145212173462, "learning_rate": 7.45860575955195e-06, "loss": 0.8089, "step": 10087 }, { "epoch": 0.3563263558053592, "grad_norm": 1.7388359308242798, "learning_rate": 7.458107664303676e-06, "loss": 0.8108, "step": 10088 }, { "epoch": 0.3563616776090671, "grad_norm": 1.6214381456375122, "learning_rate": 7.457609536884153e-06, "loss": 0.8208, "step": 10089 }, { "epoch": 0.35639699941277503, "grad_norm": 2.2759830951690674, "learning_rate": 7.457111377299897e-06, "loss": 0.83, "step": 10090 }, { "epoch": 0.35643232121648294, "grad_norm": 1.6850764751434326, "learning_rate": 7.456613185557429e-06, "loss": 0.8257, "step": 10091 }, { "epoch": 0.35646764302019085, "grad_norm": 2.8426501750946045, "learning_rate": 7.456114961663269e-06, "loss": 0.8531, "step": 10092 }, { "epoch": 0.3565029648238987, "grad_norm": 1.5548019409179688, "learning_rate": 7.455616705623938e-06, "loss": 0.8013, "step": 10093 }, { "epoch": 0.3565382866276066, "grad_norm": 1.7441297769546509, "learning_rate": 7.455118417445959e-06, "loss": 0.8144, "step": 10094 }, { "epoch": 0.3565736084313145, "grad_norm": 1.795309066772461, "learning_rate": 7.45462009713585e-06, "loss": 0.8258, "step": 10095 }, { "epoch": 0.35660893023502244, "grad_norm": 1.5823798179626465, "learning_rate": 7.454121744700135e-06, "loss": 0.8582, "step": 10096 }, { "epoch": 0.35664425203873035, "grad_norm": 1.6128278970718384, "learning_rate": 7.453623360145336e-06, "loss": 0.8175, "step": 10097 }, { "epoch": 0.35667957384243826, "grad_norm": 1.6437655687332153, "learning_rate": 7.453124943477976e-06, "loss": 0.8071, "step": 10098 }, { "epoch": 0.35671489564614617, "grad_norm": 1.5434004068374634, "learning_rate": 7.452626494704578e-06, "loss": 0.8103, "step": 10099 }, { "epoch": 0.3567502174498541, "grad_norm": 1.8494348526000977, "learning_rate": 7.452128013831664e-06, "loss": 0.8299, "step": 10100 }, { "epoch": 0.356785539253562, "grad_norm": 1.5765823125839233, "learning_rate": 7.45162950086576e-06, "loss": 0.8244, "step": 10101 }, { "epoch": 0.3568208610572699, "grad_norm": 1.7206296920776367, "learning_rate": 7.451130955813392e-06, "loss": 0.8286, "step": 10102 }, { "epoch": 0.3568561828609778, "grad_norm": 1.9906214475631714, "learning_rate": 7.4506323786810795e-06, "loss": 0.8282, "step": 10103 }, { "epoch": 0.3568915046646857, "grad_norm": 1.5774664878845215, "learning_rate": 7.450133769475354e-06, "loss": 0.8118, "step": 10104 }, { "epoch": 0.35692682646839363, "grad_norm": 1.6400903463363647, "learning_rate": 7.449635128202737e-06, "loss": 0.7974, "step": 10105 }, { "epoch": 0.3569621482721015, "grad_norm": 1.666395902633667, "learning_rate": 7.449136454869755e-06, "loss": 0.7916, "step": 10106 }, { "epoch": 0.3569974700758094, "grad_norm": 1.735679030418396, "learning_rate": 7.448637749482937e-06, "loss": 0.8234, "step": 10107 }, { "epoch": 0.3570327918795173, "grad_norm": 2.4935073852539062, "learning_rate": 7.448139012048808e-06, "loss": 0.843, "step": 10108 }, { "epoch": 0.3570681136832252, "grad_norm": 1.8386447429656982, "learning_rate": 7.447640242573896e-06, "loss": 0.8423, "step": 10109 }, { "epoch": 0.35710343548693313, "grad_norm": 1.6695696115493774, "learning_rate": 7.4471414410647295e-06, "loss": 0.823, "step": 10110 }, { "epoch": 0.35713875729064104, "grad_norm": 1.6484684944152832, "learning_rate": 7.446642607527833e-06, "loss": 0.8386, "step": 10111 }, { "epoch": 0.35717407909434895, "grad_norm": 1.7396241426467896, "learning_rate": 7.44614374196974e-06, "loss": 0.8306, "step": 10112 }, { "epoch": 0.35720940089805686, "grad_norm": 2.2391536235809326, "learning_rate": 7.445644844396976e-06, "loss": 0.8251, "step": 10113 }, { "epoch": 0.35724472270176477, "grad_norm": 1.5680582523345947, "learning_rate": 7.445145914816074e-06, "loss": 0.8365, "step": 10114 }, { "epoch": 0.3572800445054727, "grad_norm": 1.6016868352890015, "learning_rate": 7.444646953233562e-06, "loss": 0.8237, "step": 10115 }, { "epoch": 0.3573153663091806, "grad_norm": 1.8203617334365845, "learning_rate": 7.444147959655968e-06, "loss": 0.8311, "step": 10116 }, { "epoch": 0.3573506881128885, "grad_norm": 1.579490303993225, "learning_rate": 7.443648934089828e-06, "loss": 0.8103, "step": 10117 }, { "epoch": 0.3573860099165964, "grad_norm": 1.639155626296997, "learning_rate": 7.4431498765416674e-06, "loss": 0.7802, "step": 10118 }, { "epoch": 0.3574213317203043, "grad_norm": 2.123030185699463, "learning_rate": 7.442650787018022e-06, "loss": 0.8297, "step": 10119 }, { "epoch": 0.3574566535240122, "grad_norm": 1.5903935432434082, "learning_rate": 7.442151665525422e-06, "loss": 0.8237, "step": 10120 }, { "epoch": 0.3574919753277201, "grad_norm": 1.7413250207901, "learning_rate": 7.4416525120703985e-06, "loss": 0.8341, "step": 10121 }, { "epoch": 0.357527297131428, "grad_norm": 2.036752700805664, "learning_rate": 7.441153326659488e-06, "loss": 0.7935, "step": 10122 }, { "epoch": 0.3575626189351359, "grad_norm": 1.5101268291473389, "learning_rate": 7.440654109299221e-06, "loss": 0.7889, "step": 10123 }, { "epoch": 0.3575979407388438, "grad_norm": 1.7891489267349243, "learning_rate": 7.440154859996131e-06, "loss": 0.8499, "step": 10124 }, { "epoch": 0.35763326254255173, "grad_norm": 1.7772053480148315, "learning_rate": 7.439655578756753e-06, "loss": 0.8371, "step": 10125 }, { "epoch": 0.35766858434625964, "grad_norm": 1.6364864110946655, "learning_rate": 7.439156265587624e-06, "loss": 0.8338, "step": 10126 }, { "epoch": 0.35770390614996755, "grad_norm": 1.5215013027191162, "learning_rate": 7.438656920495273e-06, "loss": 0.816, "step": 10127 }, { "epoch": 0.35773922795367546, "grad_norm": 1.6261388063430786, "learning_rate": 7.438157543486241e-06, "loss": 0.8449, "step": 10128 }, { "epoch": 0.3577745497573834, "grad_norm": 1.6970791816711426, "learning_rate": 7.43765813456706e-06, "loss": 0.8385, "step": 10129 }, { "epoch": 0.3578098715610913, "grad_norm": 1.7305984497070312, "learning_rate": 7.437158693744267e-06, "loss": 0.7944, "step": 10130 }, { "epoch": 0.3578451933647992, "grad_norm": 1.6179406642913818, "learning_rate": 7.4366592210244e-06, "loss": 0.7977, "step": 10131 }, { "epoch": 0.3578805151685071, "grad_norm": 1.6200571060180664, "learning_rate": 7.436159716413995e-06, "loss": 0.8356, "step": 10132 }, { "epoch": 0.35791583697221496, "grad_norm": 5.7282538414001465, "learning_rate": 7.43566017991959e-06, "loss": 0.8749, "step": 10133 }, { "epoch": 0.3579511587759229, "grad_norm": 1.717743992805481, "learning_rate": 7.435160611547723e-06, "loss": 0.8212, "step": 10134 }, { "epoch": 0.3579864805796308, "grad_norm": 1.7046605348587036, "learning_rate": 7.434661011304931e-06, "loss": 0.7959, "step": 10135 }, { "epoch": 0.3580218023833387, "grad_norm": 1.7228331565856934, "learning_rate": 7.434161379197753e-06, "loss": 0.8135, "step": 10136 }, { "epoch": 0.3580571241870466, "grad_norm": 1.6803065538406372, "learning_rate": 7.433661715232728e-06, "loss": 0.8129, "step": 10137 }, { "epoch": 0.3580924459907545, "grad_norm": 1.8137212991714478, "learning_rate": 7.433162019416398e-06, "loss": 0.8203, "step": 10138 }, { "epoch": 0.3581277677944624, "grad_norm": 1.7658189535140991, "learning_rate": 7.432662291755299e-06, "loss": 0.8088, "step": 10139 }, { "epoch": 0.35816308959817034, "grad_norm": 1.7749481201171875, "learning_rate": 7.432162532255973e-06, "loss": 0.8503, "step": 10140 }, { "epoch": 0.35819841140187825, "grad_norm": 2.381326913833618, "learning_rate": 7.431662740924962e-06, "loss": 0.8301, "step": 10141 }, { "epoch": 0.35823373320558616, "grad_norm": 1.6236650943756104, "learning_rate": 7.4311629177688045e-06, "loss": 0.7886, "step": 10142 }, { "epoch": 0.35826905500929407, "grad_norm": 1.6216026544570923, "learning_rate": 7.430663062794046e-06, "loss": 0.813, "step": 10143 }, { "epoch": 0.358304376813002, "grad_norm": 1.7476545572280884, "learning_rate": 7.430163176007226e-06, "loss": 0.7834, "step": 10144 }, { "epoch": 0.3583396986167099, "grad_norm": 1.7392301559448242, "learning_rate": 7.429663257414886e-06, "loss": 0.8163, "step": 10145 }, { "epoch": 0.35837502042041774, "grad_norm": 1.032819390296936, "learning_rate": 7.42916330702357e-06, "loss": 0.6107, "step": 10146 }, { "epoch": 0.35841034222412566, "grad_norm": 2.016160011291504, "learning_rate": 7.428663324839822e-06, "loss": 0.8402, "step": 10147 }, { "epoch": 0.35844566402783357, "grad_norm": 1.8503400087356567, "learning_rate": 7.4281633108701845e-06, "loss": 0.8035, "step": 10148 }, { "epoch": 0.3584809858315415, "grad_norm": 2.0379796028137207, "learning_rate": 7.427663265121203e-06, "loss": 0.7924, "step": 10149 }, { "epoch": 0.3585163076352494, "grad_norm": 1.8093152046203613, "learning_rate": 7.42716318759942e-06, "loss": 0.8184, "step": 10150 }, { "epoch": 0.3585516294389573, "grad_norm": 1.7754104137420654, "learning_rate": 7.426663078311382e-06, "loss": 0.8522, "step": 10151 }, { "epoch": 0.3585869512426652, "grad_norm": 1.9558675289154053, "learning_rate": 7.426162937263634e-06, "loss": 0.8594, "step": 10152 }, { "epoch": 0.3586222730463731, "grad_norm": 1.7249046564102173, "learning_rate": 7.42566276446272e-06, "loss": 0.8042, "step": 10153 }, { "epoch": 0.35865759485008103, "grad_norm": 1.7262181043624878, "learning_rate": 7.4251625599151885e-06, "loss": 0.8237, "step": 10154 }, { "epoch": 0.35869291665378894, "grad_norm": 1.713397741317749, "learning_rate": 7.424662323627586e-06, "loss": 0.8458, "step": 10155 }, { "epoch": 0.35872823845749685, "grad_norm": 1.6891487836837769, "learning_rate": 7.424162055606459e-06, "loss": 0.8065, "step": 10156 }, { "epoch": 0.35876356026120476, "grad_norm": 1.9992460012435913, "learning_rate": 7.423661755858354e-06, "loss": 0.8053, "step": 10157 }, { "epoch": 0.35879888206491267, "grad_norm": 1.8401635885238647, "learning_rate": 7.423161424389818e-06, "loss": 0.8264, "step": 10158 }, { "epoch": 0.3588342038686205, "grad_norm": 1.6314177513122559, "learning_rate": 7.422661061207404e-06, "loss": 0.7747, "step": 10159 }, { "epoch": 0.35886952567232844, "grad_norm": 1.7813767194747925, "learning_rate": 7.422160666317655e-06, "loss": 0.7712, "step": 10160 }, { "epoch": 0.35890484747603635, "grad_norm": 2.3066577911376953, "learning_rate": 7.4216602397271235e-06, "loss": 0.8347, "step": 10161 }, { "epoch": 0.35894016927974426, "grad_norm": 1.6206706762313843, "learning_rate": 7.421159781442358e-06, "loss": 0.798, "step": 10162 }, { "epoch": 0.35897549108345217, "grad_norm": 1.666783094406128, "learning_rate": 7.420659291469908e-06, "loss": 0.8405, "step": 10163 }, { "epoch": 0.3590108128871601, "grad_norm": 1.7983896732330322, "learning_rate": 7.420158769816326e-06, "loss": 0.8444, "step": 10164 }, { "epoch": 0.359046134690868, "grad_norm": 1.662665605545044, "learning_rate": 7.419658216488159e-06, "loss": 0.8392, "step": 10165 }, { "epoch": 0.3590814564945759, "grad_norm": 1.7676312923431396, "learning_rate": 7.41915763149196e-06, "loss": 0.8009, "step": 10166 }, { "epoch": 0.3591167782982838, "grad_norm": 1.603226900100708, "learning_rate": 7.418657014834281e-06, "loss": 0.8322, "step": 10167 }, { "epoch": 0.3591521001019917, "grad_norm": 1.7046421766281128, "learning_rate": 7.418156366521674e-06, "loss": 0.8219, "step": 10168 }, { "epoch": 0.35918742190569963, "grad_norm": 1.633968472480774, "learning_rate": 7.417655686560691e-06, "loss": 0.8437, "step": 10169 }, { "epoch": 0.35922274370940754, "grad_norm": 2.039820909500122, "learning_rate": 7.417154974957885e-06, "loss": 0.8123, "step": 10170 }, { "epoch": 0.35925806551311545, "grad_norm": 1.5963667631149292, "learning_rate": 7.416654231719808e-06, "loss": 0.8073, "step": 10171 }, { "epoch": 0.3592933873168233, "grad_norm": 1.7284551858901978, "learning_rate": 7.416153456853017e-06, "loss": 0.8305, "step": 10172 }, { "epoch": 0.3593287091205312, "grad_norm": 1.555470585823059, "learning_rate": 7.415652650364062e-06, "loss": 0.812, "step": 10173 }, { "epoch": 0.35936403092423913, "grad_norm": 1.7229344844818115, "learning_rate": 7.4151518122594986e-06, "loss": 0.8518, "step": 10174 }, { "epoch": 0.35939935272794704, "grad_norm": 1.849677562713623, "learning_rate": 7.4146509425458836e-06, "loss": 0.8095, "step": 10175 }, { "epoch": 0.35943467453165495, "grad_norm": 1.6444917917251587, "learning_rate": 7.414150041229769e-06, "loss": 0.839, "step": 10176 }, { "epoch": 0.35946999633536286, "grad_norm": 1.7287429571151733, "learning_rate": 7.413649108317716e-06, "loss": 0.8024, "step": 10177 }, { "epoch": 0.3595053181390708, "grad_norm": 1.471626877784729, "learning_rate": 7.413148143816275e-06, "loss": 0.7956, "step": 10178 }, { "epoch": 0.3595406399427787, "grad_norm": 1.6993211507797241, "learning_rate": 7.412647147732004e-06, "loss": 0.8313, "step": 10179 }, { "epoch": 0.3595759617464866, "grad_norm": 1.5909618139266968, "learning_rate": 7.412146120071462e-06, "loss": 0.8164, "step": 10180 }, { "epoch": 0.3596112835501945, "grad_norm": 1.5794190168380737, "learning_rate": 7.4116450608412035e-06, "loss": 0.7901, "step": 10181 }, { "epoch": 0.3596466053539024, "grad_norm": 1.7901065349578857, "learning_rate": 7.41114397004779e-06, "loss": 0.8266, "step": 10182 }, { "epoch": 0.3596819271576103, "grad_norm": 1.7285056114196777, "learning_rate": 7.410642847697776e-06, "loss": 0.8011, "step": 10183 }, { "epoch": 0.35971724896131824, "grad_norm": 1.6555103063583374, "learning_rate": 7.410141693797722e-06, "loss": 0.8406, "step": 10184 }, { "epoch": 0.3597525707650261, "grad_norm": 1.560627818107605, "learning_rate": 7.409640508354187e-06, "loss": 0.8304, "step": 10185 }, { "epoch": 0.359787892568734, "grad_norm": 1.6028536558151245, "learning_rate": 7.409139291373729e-06, "loss": 0.7972, "step": 10186 }, { "epoch": 0.3598232143724419, "grad_norm": 1.5348491668701172, "learning_rate": 7.408638042862909e-06, "loss": 0.7775, "step": 10187 }, { "epoch": 0.3598585361761498, "grad_norm": 1.6742782592773438, "learning_rate": 7.408136762828288e-06, "loss": 0.8545, "step": 10188 }, { "epoch": 0.35989385797985773, "grad_norm": 1.5732433795928955, "learning_rate": 7.407635451276425e-06, "loss": 0.8219, "step": 10189 }, { "epoch": 0.35992917978356564, "grad_norm": 1.5062857866287231, "learning_rate": 7.407134108213881e-06, "loss": 0.8076, "step": 10190 }, { "epoch": 0.35996450158727356, "grad_norm": 1.5332932472229004, "learning_rate": 7.40663273364722e-06, "loss": 0.8188, "step": 10191 }, { "epoch": 0.35999982339098147, "grad_norm": 1.5762051343917847, "learning_rate": 7.406131327583001e-06, "loss": 0.8059, "step": 10192 }, { "epoch": 0.3600351451946894, "grad_norm": 1.7128351926803589, "learning_rate": 7.405629890027788e-06, "loss": 0.8173, "step": 10193 }, { "epoch": 0.3600704669983973, "grad_norm": 1.7705934047698975, "learning_rate": 7.405128420988143e-06, "loss": 0.8029, "step": 10194 }, { "epoch": 0.3601057888021052, "grad_norm": 1.6485986709594727, "learning_rate": 7.404626920470629e-06, "loss": 0.8268, "step": 10195 }, { "epoch": 0.3601411106058131, "grad_norm": 1.6946865320205688, "learning_rate": 7.404125388481811e-06, "loss": 0.833, "step": 10196 }, { "epoch": 0.360176432409521, "grad_norm": 1.4636971950531006, "learning_rate": 7.403623825028251e-06, "loss": 0.7909, "step": 10197 }, { "epoch": 0.3602117542132289, "grad_norm": 1.9914038181304932, "learning_rate": 7.403122230116513e-06, "loss": 0.8197, "step": 10198 }, { "epoch": 0.3602470760169368, "grad_norm": 1.6591235399246216, "learning_rate": 7.402620603753164e-06, "loss": 0.8016, "step": 10199 }, { "epoch": 0.3602823978206447, "grad_norm": 1.7893701791763306, "learning_rate": 7.40211894594477e-06, "loss": 0.8196, "step": 10200 }, { "epoch": 0.3603177196243526, "grad_norm": 1.5835429430007935, "learning_rate": 7.401617256697893e-06, "loss": 0.8203, "step": 10201 }, { "epoch": 0.3603530414280605, "grad_norm": 1.7034752368927002, "learning_rate": 7.401115536019101e-06, "loss": 0.8126, "step": 10202 }, { "epoch": 0.3603883632317684, "grad_norm": 1.8981391191482544, "learning_rate": 7.40061378391496e-06, "loss": 0.8121, "step": 10203 }, { "epoch": 0.36042368503547634, "grad_norm": 1.682388424873352, "learning_rate": 7.400112000392038e-06, "loss": 0.8037, "step": 10204 }, { "epoch": 0.36045900683918425, "grad_norm": 1.7199684381484985, "learning_rate": 7.399610185456899e-06, "loss": 0.8614, "step": 10205 }, { "epoch": 0.36049432864289216, "grad_norm": 1.5927009582519531, "learning_rate": 7.399108339116115e-06, "loss": 0.8645, "step": 10206 }, { "epoch": 0.36052965044660007, "grad_norm": 1.5810890197753906, "learning_rate": 7.398606461376252e-06, "loss": 0.8243, "step": 10207 }, { "epoch": 0.360564972250308, "grad_norm": 1.6640669107437134, "learning_rate": 7.398104552243878e-06, "loss": 0.8088, "step": 10208 }, { "epoch": 0.3606002940540159, "grad_norm": 1.5437638759613037, "learning_rate": 7.397602611725564e-06, "loss": 0.7986, "step": 10209 }, { "epoch": 0.3606356158577238, "grad_norm": 1.6286633014678955, "learning_rate": 7.397100639827876e-06, "loss": 0.7875, "step": 10210 }, { "epoch": 0.36067093766143166, "grad_norm": 1.661919355392456, "learning_rate": 7.396598636557385e-06, "loss": 0.7975, "step": 10211 }, { "epoch": 0.36070625946513957, "grad_norm": 1.5059994459152222, "learning_rate": 7.396096601920662e-06, "loss": 0.8103, "step": 10212 }, { "epoch": 0.3607415812688475, "grad_norm": 1.816206455230713, "learning_rate": 7.3955945359242774e-06, "loss": 0.8151, "step": 10213 }, { "epoch": 0.3607769030725554, "grad_norm": 1.7042138576507568, "learning_rate": 7.395092438574802e-06, "loss": 0.8061, "step": 10214 }, { "epoch": 0.3608122248762633, "grad_norm": 1.581494688987732, "learning_rate": 7.3945903098788075e-06, "loss": 0.8158, "step": 10215 }, { "epoch": 0.3608475466799712, "grad_norm": 1.760806679725647, "learning_rate": 7.394088149842864e-06, "loss": 0.8052, "step": 10216 }, { "epoch": 0.3608828684836791, "grad_norm": 1.9609706401824951, "learning_rate": 7.393585958473545e-06, "loss": 0.7914, "step": 10217 }, { "epoch": 0.36091819028738703, "grad_norm": 1.5226781368255615, "learning_rate": 7.393083735777424e-06, "loss": 0.8072, "step": 10218 }, { "epoch": 0.36095351209109494, "grad_norm": 1.5127533674240112, "learning_rate": 7.392581481761072e-06, "loss": 0.8131, "step": 10219 }, { "epoch": 0.36098883389480285, "grad_norm": 6.013857364654541, "learning_rate": 7.392079196431064e-06, "loss": 0.8531, "step": 10220 }, { "epoch": 0.36102415569851076, "grad_norm": 1.5788321495056152, "learning_rate": 7.391576879793972e-06, "loss": 0.7993, "step": 10221 }, { "epoch": 0.3610594775022187, "grad_norm": 2.0844364166259766, "learning_rate": 7.391074531856373e-06, "loss": 0.8356, "step": 10222 }, { "epoch": 0.3610947993059266, "grad_norm": 1.9504406452178955, "learning_rate": 7.390572152624838e-06, "loss": 0.8199, "step": 10223 }, { "epoch": 0.36113012110963444, "grad_norm": 1.7261784076690674, "learning_rate": 7.390069742105944e-06, "loss": 0.8298, "step": 10224 }, { "epoch": 0.36116544291334235, "grad_norm": 1.6802579164505005, "learning_rate": 7.389567300306268e-06, "loss": 0.8311, "step": 10225 }, { "epoch": 0.36120076471705026, "grad_norm": 1.589421033859253, "learning_rate": 7.389064827232384e-06, "loss": 0.8082, "step": 10226 }, { "epoch": 0.36123608652075817, "grad_norm": 1.7679773569107056, "learning_rate": 7.388562322890869e-06, "loss": 0.8342, "step": 10227 }, { "epoch": 0.3612714083244661, "grad_norm": 1.7828844785690308, "learning_rate": 7.3880597872883e-06, "loss": 0.7959, "step": 10228 }, { "epoch": 0.361306730128174, "grad_norm": 0.978649377822876, "learning_rate": 7.38755722043125e-06, "loss": 0.5775, "step": 10229 }, { "epoch": 0.3613420519318819, "grad_norm": 1.7093799114227295, "learning_rate": 7.387054622326304e-06, "loss": 0.8138, "step": 10230 }, { "epoch": 0.3613773737355898, "grad_norm": 2.022230863571167, "learning_rate": 7.386551992980033e-06, "loss": 0.8335, "step": 10231 }, { "epoch": 0.3614126955392977, "grad_norm": 1.9149564504623413, "learning_rate": 7.386049332399019e-06, "loss": 0.8497, "step": 10232 }, { "epoch": 0.36144801734300563, "grad_norm": 1.8173850774765015, "learning_rate": 7.38554664058984e-06, "loss": 0.8023, "step": 10233 }, { "epoch": 0.36148333914671354, "grad_norm": 1.571847677230835, "learning_rate": 7.385043917559075e-06, "loss": 0.7949, "step": 10234 }, { "epoch": 0.36151866095042146, "grad_norm": 1.9281001091003418, "learning_rate": 7.384541163313304e-06, "loss": 0.8316, "step": 10235 }, { "epoch": 0.36155398275412937, "grad_norm": 1.7746429443359375, "learning_rate": 7.384038377859106e-06, "loss": 0.8222, "step": 10236 }, { "epoch": 0.3615893045578372, "grad_norm": 1.7006810903549194, "learning_rate": 7.383535561203061e-06, "loss": 0.8066, "step": 10237 }, { "epoch": 0.36162462636154513, "grad_norm": 1.7486356496810913, "learning_rate": 7.383032713351752e-06, "loss": 0.8008, "step": 10238 }, { "epoch": 0.36165994816525304, "grad_norm": 2.655083179473877, "learning_rate": 7.382529834311758e-06, "loss": 0.8044, "step": 10239 }, { "epoch": 0.36169526996896095, "grad_norm": 1.9746073484420776, "learning_rate": 7.382026924089662e-06, "loss": 0.8308, "step": 10240 }, { "epoch": 0.36173059177266886, "grad_norm": 1.715388536453247, "learning_rate": 7.381523982692046e-06, "loss": 0.8246, "step": 10241 }, { "epoch": 0.3617659135763768, "grad_norm": 1.59034264087677, "learning_rate": 7.3810210101254895e-06, "loss": 0.8114, "step": 10242 }, { "epoch": 0.3618012353800847, "grad_norm": 1.5501550436019897, "learning_rate": 7.38051800639658e-06, "loss": 0.8024, "step": 10243 }, { "epoch": 0.3618365571837926, "grad_norm": 1.8504518270492554, "learning_rate": 7.3800149715118974e-06, "loss": 0.8319, "step": 10244 }, { "epoch": 0.3618718789875005, "grad_norm": 1.7599427700042725, "learning_rate": 7.379511905478025e-06, "loss": 0.7968, "step": 10245 }, { "epoch": 0.3619072007912084, "grad_norm": 1.840314507484436, "learning_rate": 7.379008808301551e-06, "loss": 0.869, "step": 10246 }, { "epoch": 0.3619425225949163, "grad_norm": 1.7399373054504395, "learning_rate": 7.378505679989054e-06, "loss": 0.8049, "step": 10247 }, { "epoch": 0.36197784439862424, "grad_norm": 1.5653280019760132, "learning_rate": 7.3780025205471225e-06, "loss": 0.7999, "step": 10248 }, { "epoch": 0.36201316620233215, "grad_norm": 1.766636848449707, "learning_rate": 7.377499329982343e-06, "loss": 0.8258, "step": 10249 }, { "epoch": 0.36204848800604, "grad_norm": 1.6009050607681274, "learning_rate": 7.376996108301296e-06, "loss": 0.7872, "step": 10250 }, { "epoch": 0.3620838098097479, "grad_norm": 1.6849644184112549, "learning_rate": 7.376492855510573e-06, "loss": 0.8101, "step": 10251 }, { "epoch": 0.3621191316134558, "grad_norm": 1.725738763809204, "learning_rate": 7.375989571616758e-06, "loss": 0.8134, "step": 10252 }, { "epoch": 0.36215445341716374, "grad_norm": 2.1537325382232666, "learning_rate": 7.375486256626438e-06, "loss": 0.7988, "step": 10253 }, { "epoch": 0.36218977522087165, "grad_norm": 1.6108641624450684, "learning_rate": 7.3749829105462005e-06, "loss": 0.7667, "step": 10254 }, { "epoch": 0.36222509702457956, "grad_norm": 1.5970447063446045, "learning_rate": 7.374479533382632e-06, "loss": 0.7809, "step": 10255 }, { "epoch": 0.36226041882828747, "grad_norm": 1.6510519981384277, "learning_rate": 7.373976125142323e-06, "loss": 0.8345, "step": 10256 }, { "epoch": 0.3622957406319954, "grad_norm": 2.619521141052246, "learning_rate": 7.373472685831861e-06, "loss": 0.8078, "step": 10257 }, { "epoch": 0.3623310624357033, "grad_norm": 1.687705397605896, "learning_rate": 7.3729692154578325e-06, "loss": 0.8154, "step": 10258 }, { "epoch": 0.3623663842394112, "grad_norm": 1.9127483367919922, "learning_rate": 7.372465714026831e-06, "loss": 0.8541, "step": 10259 }, { "epoch": 0.3624017060431191, "grad_norm": 1.7498726844787598, "learning_rate": 7.371962181545444e-06, "loss": 0.7958, "step": 10260 }, { "epoch": 0.362437027846827, "grad_norm": 1.6358736753463745, "learning_rate": 7.371458618020261e-06, "loss": 0.8185, "step": 10261 }, { "epoch": 0.36247234965053493, "grad_norm": 1.0631366968154907, "learning_rate": 7.370955023457874e-06, "loss": 0.6107, "step": 10262 }, { "epoch": 0.3625076714542428, "grad_norm": 1.890141248703003, "learning_rate": 7.370451397864873e-06, "loss": 0.8051, "step": 10263 }, { "epoch": 0.3625429932579507, "grad_norm": 1.6521228551864624, "learning_rate": 7.36994774124785e-06, "loss": 0.7974, "step": 10264 }, { "epoch": 0.3625783150616586, "grad_norm": 1.5660337209701538, "learning_rate": 7.369444053613397e-06, "loss": 0.8168, "step": 10265 }, { "epoch": 0.3626136368653665, "grad_norm": 1.8749021291732788, "learning_rate": 7.368940334968104e-06, "loss": 0.7969, "step": 10266 }, { "epoch": 0.36264895866907443, "grad_norm": 1.8036442995071411, "learning_rate": 7.368436585318567e-06, "loss": 0.7967, "step": 10267 }, { "epoch": 0.36268428047278234, "grad_norm": 1.8008029460906982, "learning_rate": 7.367932804671376e-06, "loss": 0.788, "step": 10268 }, { "epoch": 0.36271960227649025, "grad_norm": 1.696512222290039, "learning_rate": 7.367428993033125e-06, "loss": 0.7981, "step": 10269 }, { "epoch": 0.36275492408019816, "grad_norm": 1.5354316234588623, "learning_rate": 7.366925150410411e-06, "loss": 0.842, "step": 10270 }, { "epoch": 0.36279024588390607, "grad_norm": 1.6173814535140991, "learning_rate": 7.366421276809823e-06, "loss": 0.8136, "step": 10271 }, { "epoch": 0.362825567687614, "grad_norm": 1.8574366569519043, "learning_rate": 7.365917372237959e-06, "loss": 0.8138, "step": 10272 }, { "epoch": 0.3628608894913219, "grad_norm": 1.592328667640686, "learning_rate": 7.365413436701414e-06, "loss": 0.799, "step": 10273 }, { "epoch": 0.3628962112950298, "grad_norm": 2.279013156890869, "learning_rate": 7.364909470206781e-06, "loss": 0.8179, "step": 10274 }, { "epoch": 0.3629315330987377, "grad_norm": 2.1151700019836426, "learning_rate": 7.364405472760658e-06, "loss": 0.826, "step": 10275 }, { "epoch": 0.36296685490244557, "grad_norm": 1.5759788751602173, "learning_rate": 7.3639014443696414e-06, "loss": 0.7958, "step": 10276 }, { "epoch": 0.3630021767061535, "grad_norm": 1.7658663988113403, "learning_rate": 7.363397385040327e-06, "loss": 0.8246, "step": 10277 }, { "epoch": 0.3630374985098614, "grad_norm": 1.7628145217895508, "learning_rate": 7.362893294779312e-06, "loss": 0.8218, "step": 10278 }, { "epoch": 0.3630728203135693, "grad_norm": 1.542012333869934, "learning_rate": 7.362389173593192e-06, "loss": 0.8066, "step": 10279 }, { "epoch": 0.3631081421172772, "grad_norm": 1.6818605661392212, "learning_rate": 7.361885021488568e-06, "loss": 0.8386, "step": 10280 }, { "epoch": 0.3631434639209851, "grad_norm": 1.4489413499832153, "learning_rate": 7.361380838472037e-06, "loss": 0.7882, "step": 10281 }, { "epoch": 0.36317878572469303, "grad_norm": 1.9021505117416382, "learning_rate": 7.360876624550196e-06, "loss": 0.8046, "step": 10282 }, { "epoch": 0.36321410752840094, "grad_norm": 2.081681728363037, "learning_rate": 7.3603723797296475e-06, "loss": 0.8328, "step": 10283 }, { "epoch": 0.36324942933210885, "grad_norm": 1.5919071435928345, "learning_rate": 7.359868104016987e-06, "loss": 0.8262, "step": 10284 }, { "epoch": 0.36328475113581676, "grad_norm": 1.8038177490234375, "learning_rate": 7.359363797418818e-06, "loss": 0.82, "step": 10285 }, { "epoch": 0.3633200729395247, "grad_norm": 1.7338576316833496, "learning_rate": 7.358859459941739e-06, "loss": 0.81, "step": 10286 }, { "epoch": 0.3633553947432326, "grad_norm": 1.5657566785812378, "learning_rate": 7.35835509159235e-06, "loss": 0.8122, "step": 10287 }, { "epoch": 0.3633907165469405, "grad_norm": 1.8106335401535034, "learning_rate": 7.357850692377253e-06, "loss": 0.8308, "step": 10288 }, { "epoch": 0.36342603835064835, "grad_norm": 1.6729567050933838, "learning_rate": 7.35734626230305e-06, "loss": 0.808, "step": 10289 }, { "epoch": 0.36346136015435626, "grad_norm": 1.8180915117263794, "learning_rate": 7.356841801376342e-06, "loss": 0.7809, "step": 10290 }, { "epoch": 0.36349668195806417, "grad_norm": 1.8288803100585938, "learning_rate": 7.356337309603732e-06, "loss": 0.8004, "step": 10291 }, { "epoch": 0.3635320037617721, "grad_norm": 1.8655582666397095, "learning_rate": 7.3558327869918215e-06, "loss": 0.8656, "step": 10292 }, { "epoch": 0.36356732556548, "grad_norm": 1.662820816040039, "learning_rate": 7.3553282335472146e-06, "loss": 0.8261, "step": 10293 }, { "epoch": 0.3636026473691879, "grad_norm": 1.6208523511886597, "learning_rate": 7.354823649276515e-06, "loss": 0.7783, "step": 10294 }, { "epoch": 0.3636379691728958, "grad_norm": 1.6177033185958862, "learning_rate": 7.354319034186324e-06, "loss": 0.8116, "step": 10295 }, { "epoch": 0.3636732909766037, "grad_norm": 1.6067891120910645, "learning_rate": 7.35381438828325e-06, "loss": 0.8172, "step": 10296 }, { "epoch": 0.36370861278031164, "grad_norm": 1.6724172830581665, "learning_rate": 7.353309711573895e-06, "loss": 0.8327, "step": 10297 }, { "epoch": 0.36374393458401955, "grad_norm": 1.7147022485733032, "learning_rate": 7.352805004064865e-06, "loss": 0.7874, "step": 10298 }, { "epoch": 0.36377925638772746, "grad_norm": 2.044090747833252, "learning_rate": 7.352300265762766e-06, "loss": 0.8653, "step": 10299 }, { "epoch": 0.36381457819143537, "grad_norm": 1.6580510139465332, "learning_rate": 7.351795496674202e-06, "loss": 0.8149, "step": 10300 }, { "epoch": 0.3638498999951433, "grad_norm": 1.5916944742202759, "learning_rate": 7.351290696805783e-06, "loss": 0.7965, "step": 10301 }, { "epoch": 0.36388522179885113, "grad_norm": 1.5115830898284912, "learning_rate": 7.350785866164112e-06, "loss": 0.807, "step": 10302 }, { "epoch": 0.36392054360255904, "grad_norm": 1.700020670890808, "learning_rate": 7.350281004755797e-06, "loss": 0.7662, "step": 10303 }, { "epoch": 0.36395586540626695, "grad_norm": 1.8753173351287842, "learning_rate": 7.349776112587447e-06, "loss": 0.8032, "step": 10304 }, { "epoch": 0.36399118720997486, "grad_norm": 1.5831838846206665, "learning_rate": 7.349271189665668e-06, "loss": 0.8272, "step": 10305 }, { "epoch": 0.3640265090136828, "grad_norm": 1.5313276052474976, "learning_rate": 7.348766235997069e-06, "loss": 0.8129, "step": 10306 }, { "epoch": 0.3640618308173907, "grad_norm": 1.6039459705352783, "learning_rate": 7.34826125158826e-06, "loss": 0.8576, "step": 10307 }, { "epoch": 0.3640971526210986, "grad_norm": 1.6639292240142822, "learning_rate": 7.347756236445848e-06, "loss": 0.8164, "step": 10308 }, { "epoch": 0.3641324744248065, "grad_norm": 4.841073036193848, "learning_rate": 7.347251190576445e-06, "loss": 0.8041, "step": 10309 }, { "epoch": 0.3641677962285144, "grad_norm": 1.9486944675445557, "learning_rate": 7.346746113986659e-06, "loss": 0.8291, "step": 10310 }, { "epoch": 0.36420311803222233, "grad_norm": 1.7316666841506958, "learning_rate": 7.3462410066831015e-06, "loss": 0.8666, "step": 10311 }, { "epoch": 0.36423843983593024, "grad_norm": 1.5052911043167114, "learning_rate": 7.345735868672383e-06, "loss": 0.7935, "step": 10312 }, { "epoch": 0.36427376163963815, "grad_norm": 1.5558528900146484, "learning_rate": 7.345230699961112e-06, "loss": 0.8146, "step": 10313 }, { "epoch": 0.36430908344334606, "grad_norm": 1.6872329711914062, "learning_rate": 7.344725500555905e-06, "loss": 0.8177, "step": 10314 }, { "epoch": 0.3643444052470539, "grad_norm": 1.7612931728363037, "learning_rate": 7.344220270463371e-06, "loss": 0.8444, "step": 10315 }, { "epoch": 0.3643797270507618, "grad_norm": 1.6438502073287964, "learning_rate": 7.343715009690122e-06, "loss": 0.8267, "step": 10316 }, { "epoch": 0.36441504885446974, "grad_norm": 1.566955327987671, "learning_rate": 7.343209718242772e-06, "loss": 0.8092, "step": 10317 }, { "epoch": 0.36445037065817765, "grad_norm": 1.817708969116211, "learning_rate": 7.3427043961279345e-06, "loss": 0.8237, "step": 10318 }, { "epoch": 0.36448569246188556, "grad_norm": 1.7558327913284302, "learning_rate": 7.342199043352221e-06, "loss": 0.8512, "step": 10319 }, { "epoch": 0.36452101426559347, "grad_norm": 1.7136492729187012, "learning_rate": 7.341693659922247e-06, "loss": 0.8382, "step": 10320 }, { "epoch": 0.3645563360693014, "grad_norm": 1.5854768753051758, "learning_rate": 7.341188245844627e-06, "loss": 0.7657, "step": 10321 }, { "epoch": 0.3645916578730093, "grad_norm": 1.6501072645187378, "learning_rate": 7.3406828011259755e-06, "loss": 0.8029, "step": 10322 }, { "epoch": 0.3646269796767172, "grad_norm": 1.7477295398712158, "learning_rate": 7.340177325772907e-06, "loss": 0.829, "step": 10323 }, { "epoch": 0.3646623014804251, "grad_norm": 1.646541714668274, "learning_rate": 7.3396718197920365e-06, "loss": 0.8318, "step": 10324 }, { "epoch": 0.364697623284133, "grad_norm": 1.7064054012298584, "learning_rate": 7.3391662831899825e-06, "loss": 0.8199, "step": 10325 }, { "epoch": 0.36473294508784093, "grad_norm": 1.677079677581787, "learning_rate": 7.338660715973359e-06, "loss": 0.8132, "step": 10326 }, { "epoch": 0.36476826689154884, "grad_norm": 1.003180742263794, "learning_rate": 7.338155118148784e-06, "loss": 0.6138, "step": 10327 }, { "epoch": 0.3648035886952567, "grad_norm": 1.6120233535766602, "learning_rate": 7.337649489722876e-06, "loss": 0.8183, "step": 10328 }, { "epoch": 0.3648389104989646, "grad_norm": 1.5388708114624023, "learning_rate": 7.337143830702249e-06, "loss": 0.832, "step": 10329 }, { "epoch": 0.3648742323026725, "grad_norm": 1.6828038692474365, "learning_rate": 7.336638141093524e-06, "loss": 0.7839, "step": 10330 }, { "epoch": 0.36490955410638043, "grad_norm": 2.8801512718200684, "learning_rate": 7.336132420903316e-06, "loss": 0.7724, "step": 10331 }, { "epoch": 0.36494487591008834, "grad_norm": 1.7608256340026855, "learning_rate": 7.3356266701382486e-06, "loss": 0.83, "step": 10332 }, { "epoch": 0.36498019771379625, "grad_norm": 1.6388368606567383, "learning_rate": 7.335120888804938e-06, "loss": 0.8027, "step": 10333 }, { "epoch": 0.36501551951750416, "grad_norm": 1.850657343864441, "learning_rate": 7.334615076910002e-06, "loss": 0.8055, "step": 10334 }, { "epoch": 0.36505084132121207, "grad_norm": 2.0827152729034424, "learning_rate": 7.334109234460065e-06, "loss": 0.8065, "step": 10335 }, { "epoch": 0.36508616312492, "grad_norm": 1.8409525156021118, "learning_rate": 7.333603361461744e-06, "loss": 0.8287, "step": 10336 }, { "epoch": 0.3651214849286279, "grad_norm": 1.5983314514160156, "learning_rate": 7.3330974579216605e-06, "loss": 0.8164, "step": 10337 }, { "epoch": 0.3651568067323358, "grad_norm": 1.6348342895507812, "learning_rate": 7.332591523846438e-06, "loss": 0.7875, "step": 10338 }, { "epoch": 0.3651921285360437, "grad_norm": 1.7225435972213745, "learning_rate": 7.332085559242694e-06, "loss": 0.8098, "step": 10339 }, { "epoch": 0.3652274503397516, "grad_norm": 1.9655789136886597, "learning_rate": 7.331579564117054e-06, "loss": 0.8087, "step": 10340 }, { "epoch": 0.3652627721434595, "grad_norm": 1.5220417976379395, "learning_rate": 7.331073538476139e-06, "loss": 0.8131, "step": 10341 }, { "epoch": 0.3652980939471674, "grad_norm": 1.7917882204055786, "learning_rate": 7.33056748232657e-06, "loss": 0.8215, "step": 10342 }, { "epoch": 0.3653334157508753, "grad_norm": 1.4998866319656372, "learning_rate": 7.330061395674974e-06, "loss": 0.7881, "step": 10343 }, { "epoch": 0.3653687375545832, "grad_norm": 1.59779691696167, "learning_rate": 7.329555278527971e-06, "loss": 0.8389, "step": 10344 }, { "epoch": 0.3654040593582911, "grad_norm": 1.7884877920150757, "learning_rate": 7.329049130892186e-06, "loss": 0.8028, "step": 10345 }, { "epoch": 0.36543938116199903, "grad_norm": 1.6097618341445923, "learning_rate": 7.328542952774246e-06, "loss": 0.8134, "step": 10346 }, { "epoch": 0.36547470296570694, "grad_norm": 1.5718873739242554, "learning_rate": 7.328036744180772e-06, "loss": 0.7955, "step": 10347 }, { "epoch": 0.36551002476941485, "grad_norm": 1.6277068853378296, "learning_rate": 7.327530505118391e-06, "loss": 0.8365, "step": 10348 }, { "epoch": 0.36554534657312276, "grad_norm": 1.5374540090560913, "learning_rate": 7.32702423559373e-06, "loss": 0.8313, "step": 10349 }, { "epoch": 0.3655806683768307, "grad_norm": 1.6837451457977295, "learning_rate": 7.32651793561341e-06, "loss": 0.8239, "step": 10350 }, { "epoch": 0.3656159901805386, "grad_norm": 1.7419204711914062, "learning_rate": 7.326011605184062e-06, "loss": 0.837, "step": 10351 }, { "epoch": 0.3656513119842465, "grad_norm": 1.5467023849487305, "learning_rate": 7.3255052443123124e-06, "loss": 0.8072, "step": 10352 }, { "epoch": 0.3656866337879544, "grad_norm": 1.8335165977478027, "learning_rate": 7.324998853004784e-06, "loss": 0.7959, "step": 10353 }, { "epoch": 0.36572195559166226, "grad_norm": 1.7721903324127197, "learning_rate": 7.324492431268111e-06, "loss": 0.7806, "step": 10354 }, { "epoch": 0.3657572773953702, "grad_norm": 1.7261682748794556, "learning_rate": 7.323985979108916e-06, "loss": 0.8263, "step": 10355 }, { "epoch": 0.3657925991990781, "grad_norm": 1.7500265836715698, "learning_rate": 7.323479496533831e-06, "loss": 0.7965, "step": 10356 }, { "epoch": 0.365827921002786, "grad_norm": 1.7749948501586914, "learning_rate": 7.322972983549484e-06, "loss": 0.8104, "step": 10357 }, { "epoch": 0.3658632428064939, "grad_norm": 1.7069640159606934, "learning_rate": 7.322466440162502e-06, "loss": 0.7777, "step": 10358 }, { "epoch": 0.3658985646102018, "grad_norm": 1.6788527965545654, "learning_rate": 7.321959866379514e-06, "loss": 0.8241, "step": 10359 }, { "epoch": 0.3659338864139097, "grad_norm": 2.0372374057769775, "learning_rate": 7.321453262207153e-06, "loss": 0.8296, "step": 10360 }, { "epoch": 0.36596920821761764, "grad_norm": 1.576310396194458, "learning_rate": 7.320946627652049e-06, "loss": 0.8332, "step": 10361 }, { "epoch": 0.36600453002132555, "grad_norm": 1.920448660850525, "learning_rate": 7.320439962720831e-06, "loss": 0.8396, "step": 10362 }, { "epoch": 0.36603985182503346, "grad_norm": 1.671642541885376, "learning_rate": 7.319933267420131e-06, "loss": 0.7809, "step": 10363 }, { "epoch": 0.36607517362874137, "grad_norm": 1.7479851245880127, "learning_rate": 7.319426541756581e-06, "loss": 0.8189, "step": 10364 }, { "epoch": 0.3661104954324493, "grad_norm": 1.6488878726959229, "learning_rate": 7.318919785736812e-06, "loss": 0.7992, "step": 10365 }, { "epoch": 0.3661458172361572, "grad_norm": 1.745559811592102, "learning_rate": 7.318412999367458e-06, "loss": 0.8267, "step": 10366 }, { "epoch": 0.36618113903986504, "grad_norm": 1.6050118207931519, "learning_rate": 7.317906182655149e-06, "loss": 0.7934, "step": 10367 }, { "epoch": 0.36621646084357296, "grad_norm": 0.9872308373451233, "learning_rate": 7.31739933560652e-06, "loss": 0.6153, "step": 10368 }, { "epoch": 0.36625178264728087, "grad_norm": 1.5294867753982544, "learning_rate": 7.316892458228204e-06, "loss": 0.8039, "step": 10369 }, { "epoch": 0.3662871044509888, "grad_norm": 2.027147054672241, "learning_rate": 7.316385550526835e-06, "loss": 0.8135, "step": 10370 }, { "epoch": 0.3663224262546967, "grad_norm": 1.6113992929458618, "learning_rate": 7.315878612509046e-06, "loss": 0.8021, "step": 10371 }, { "epoch": 0.3663577480584046, "grad_norm": 1.6366769075393677, "learning_rate": 7.315371644181476e-06, "loss": 0.8399, "step": 10372 }, { "epoch": 0.3663930698621125, "grad_norm": 1.6171940565109253, "learning_rate": 7.314864645550756e-06, "loss": 0.8057, "step": 10373 }, { "epoch": 0.3664283916658204, "grad_norm": 1.6659687757492065, "learning_rate": 7.314357616623522e-06, "loss": 0.841, "step": 10374 }, { "epoch": 0.36646371346952833, "grad_norm": 1.6645900011062622, "learning_rate": 7.313850557406412e-06, "loss": 0.849, "step": 10375 }, { "epoch": 0.36649903527323624, "grad_norm": 1.6321496963500977, "learning_rate": 7.313343467906059e-06, "loss": 0.843, "step": 10376 }, { "epoch": 0.36653435707694415, "grad_norm": 1.8314937353134155, "learning_rate": 7.312836348129102e-06, "loss": 0.8306, "step": 10377 }, { "epoch": 0.36656967888065206, "grad_norm": 1.5501668453216553, "learning_rate": 7.312329198082179e-06, "loss": 0.7921, "step": 10378 }, { "epoch": 0.36660500068435997, "grad_norm": 1.5273973941802979, "learning_rate": 7.311822017771924e-06, "loss": 0.8031, "step": 10379 }, { "epoch": 0.3666403224880678, "grad_norm": 1.7718737125396729, "learning_rate": 7.311314807204979e-06, "loss": 0.8203, "step": 10380 }, { "epoch": 0.36667564429177574, "grad_norm": 1.8846615552902222, "learning_rate": 7.3108075663879784e-06, "loss": 0.8546, "step": 10381 }, { "epoch": 0.36671096609548365, "grad_norm": 1.6484514474868774, "learning_rate": 7.310300295327564e-06, "loss": 0.7854, "step": 10382 }, { "epoch": 0.36674628789919156, "grad_norm": 1.6649441719055176, "learning_rate": 7.309792994030373e-06, "loss": 0.8234, "step": 10383 }, { "epoch": 0.36678160970289947, "grad_norm": 1.7932723760604858, "learning_rate": 7.3092856625030464e-06, "loss": 0.8151, "step": 10384 }, { "epoch": 0.3668169315066074, "grad_norm": 1.633504033088684, "learning_rate": 7.308778300752223e-06, "loss": 0.805, "step": 10385 }, { "epoch": 0.3668522533103153, "grad_norm": 1.7108842134475708, "learning_rate": 7.308270908784543e-06, "loss": 0.8329, "step": 10386 }, { "epoch": 0.3668875751140232, "grad_norm": 1.6295883655548096, "learning_rate": 7.307763486606646e-06, "loss": 0.7951, "step": 10387 }, { "epoch": 0.3669228969177311, "grad_norm": 1.7459465265274048, "learning_rate": 7.307256034225176e-06, "loss": 0.8074, "step": 10388 }, { "epoch": 0.366958218721439, "grad_norm": 1.6974138021469116, "learning_rate": 7.306748551646772e-06, "loss": 0.7812, "step": 10389 }, { "epoch": 0.36699354052514693, "grad_norm": 1.7025327682495117, "learning_rate": 7.306241038878077e-06, "loss": 0.8459, "step": 10390 }, { "epoch": 0.36702886232885484, "grad_norm": 1.7130391597747803, "learning_rate": 7.305733495925732e-06, "loss": 0.8312, "step": 10391 }, { "epoch": 0.36706418413256275, "grad_norm": 1.7822927236557007, "learning_rate": 7.305225922796381e-06, "loss": 0.8141, "step": 10392 }, { "epoch": 0.3670995059362706, "grad_norm": 1.6579333543777466, "learning_rate": 7.304718319496666e-06, "loss": 0.8355, "step": 10393 }, { "epoch": 0.3671348277399785, "grad_norm": 1.7483534812927246, "learning_rate": 7.304210686033232e-06, "loss": 0.8506, "step": 10394 }, { "epoch": 0.36717014954368643, "grad_norm": 1.5592498779296875, "learning_rate": 7.30370302241272e-06, "loss": 0.8184, "step": 10395 }, { "epoch": 0.36720547134739434, "grad_norm": 1.6870481967926025, "learning_rate": 7.303195328641778e-06, "loss": 0.8464, "step": 10396 }, { "epoch": 0.36724079315110225, "grad_norm": 2.1180102825164795, "learning_rate": 7.302687604727046e-06, "loss": 0.8396, "step": 10397 }, { "epoch": 0.36727611495481016, "grad_norm": 2.574448347091675, "learning_rate": 7.302179850675173e-06, "loss": 0.8068, "step": 10398 }, { "epoch": 0.3673114367585181, "grad_norm": 1.5378912687301636, "learning_rate": 7.301672066492803e-06, "loss": 0.8119, "step": 10399 }, { "epoch": 0.367346758562226, "grad_norm": 1.6297383308410645, "learning_rate": 7.301164252186581e-06, "loss": 0.8222, "step": 10400 }, { "epoch": 0.3673820803659339, "grad_norm": 1.5146691799163818, "learning_rate": 7.300656407763155e-06, "loss": 0.8118, "step": 10401 }, { "epoch": 0.3674174021696418, "grad_norm": 1.4842114448547363, "learning_rate": 7.30014853322917e-06, "loss": 0.8139, "step": 10402 }, { "epoch": 0.3674527239733497, "grad_norm": 2.149096727371216, "learning_rate": 7.299640628591272e-06, "loss": 0.8297, "step": 10403 }, { "epoch": 0.3674880457770576, "grad_norm": 1.6432185173034668, "learning_rate": 7.2991326938561115e-06, "loss": 0.8115, "step": 10404 }, { "epoch": 0.36752336758076554, "grad_norm": 1.6752824783325195, "learning_rate": 7.298624729030332e-06, "loss": 0.8031, "step": 10405 }, { "epoch": 0.3675586893844734, "grad_norm": 1.5012791156768799, "learning_rate": 7.298116734120587e-06, "loss": 0.8342, "step": 10406 }, { "epoch": 0.3675940111881813, "grad_norm": 1.718366026878357, "learning_rate": 7.2976087091335216e-06, "loss": 0.8331, "step": 10407 }, { "epoch": 0.3676293329918892, "grad_norm": 1.6237798929214478, "learning_rate": 7.297100654075784e-06, "loss": 0.8421, "step": 10408 }, { "epoch": 0.3676646547955971, "grad_norm": 1.7348835468292236, "learning_rate": 7.296592568954027e-06, "loss": 0.7804, "step": 10409 }, { "epoch": 0.36769997659930503, "grad_norm": 1.5865436792373657, "learning_rate": 7.296084453774897e-06, "loss": 0.8385, "step": 10410 }, { "epoch": 0.36773529840301294, "grad_norm": 1.7122218608856201, "learning_rate": 7.295576308545043e-06, "loss": 0.7977, "step": 10411 }, { "epoch": 0.36777062020672086, "grad_norm": 1.6921401023864746, "learning_rate": 7.295068133271122e-06, "loss": 0.7887, "step": 10412 }, { "epoch": 0.36780594201042877, "grad_norm": 1.5558106899261475, "learning_rate": 7.294559927959777e-06, "loss": 0.8362, "step": 10413 }, { "epoch": 0.3678412638141367, "grad_norm": 1.5301412343978882, "learning_rate": 7.294051692617666e-06, "loss": 0.811, "step": 10414 }, { "epoch": 0.3678765856178446, "grad_norm": 1.4969868659973145, "learning_rate": 7.293543427251435e-06, "loss": 0.7927, "step": 10415 }, { "epoch": 0.3679119074215525, "grad_norm": 1.4698363542556763, "learning_rate": 7.2930351318677395e-06, "loss": 0.7877, "step": 10416 }, { "epoch": 0.3679472292252604, "grad_norm": 1.6505217552185059, "learning_rate": 7.292526806473232e-06, "loss": 0.812, "step": 10417 }, { "epoch": 0.3679825510289683, "grad_norm": 1.799107551574707, "learning_rate": 7.292018451074564e-06, "loss": 0.8446, "step": 10418 }, { "epoch": 0.3680178728326762, "grad_norm": 1.5502376556396484, "learning_rate": 7.29151006567839e-06, "loss": 0.8139, "step": 10419 }, { "epoch": 0.3680531946363841, "grad_norm": 1.6259394884109497, "learning_rate": 7.291001650291362e-06, "loss": 0.8188, "step": 10420 }, { "epoch": 0.368088516440092, "grad_norm": 1.7294849157333374, "learning_rate": 7.290493204920137e-06, "loss": 0.8085, "step": 10421 }, { "epoch": 0.3681238382437999, "grad_norm": 1.795823335647583, "learning_rate": 7.2899847295713666e-06, "loss": 0.8172, "step": 10422 }, { "epoch": 0.3681591600475078, "grad_norm": 1.4117590188980103, "learning_rate": 7.2894762242517056e-06, "loss": 0.7688, "step": 10423 }, { "epoch": 0.3681944818512157, "grad_norm": 1.608832836151123, "learning_rate": 7.288967688967811e-06, "loss": 0.8287, "step": 10424 }, { "epoch": 0.36822980365492364, "grad_norm": 1.6888680458068848, "learning_rate": 7.2884591237263365e-06, "loss": 0.8488, "step": 10425 }, { "epoch": 0.36826512545863155, "grad_norm": 1.5027029514312744, "learning_rate": 7.28795052853394e-06, "loss": 0.8031, "step": 10426 }, { "epoch": 0.36830044726233946, "grad_norm": 1.7744592428207397, "learning_rate": 7.287441903397278e-06, "loss": 0.8083, "step": 10427 }, { "epoch": 0.36833576906604737, "grad_norm": 1.6537851095199585, "learning_rate": 7.286933248323005e-06, "loss": 0.8138, "step": 10428 }, { "epoch": 0.3683710908697553, "grad_norm": 1.5255366563796997, "learning_rate": 7.28642456331778e-06, "loss": 0.8262, "step": 10429 }, { "epoch": 0.3684064126734632, "grad_norm": 1.812134861946106, "learning_rate": 7.285915848388261e-06, "loss": 0.8651, "step": 10430 }, { "epoch": 0.3684417344771711, "grad_norm": 1.9724794626235962, "learning_rate": 7.285407103541103e-06, "loss": 0.8185, "step": 10431 }, { "epoch": 0.36847705628087896, "grad_norm": 1.4821125268936157, "learning_rate": 7.284898328782968e-06, "loss": 0.8238, "step": 10432 }, { "epoch": 0.36851237808458687, "grad_norm": 1.5776543617248535, "learning_rate": 7.284389524120513e-06, "loss": 0.7851, "step": 10433 }, { "epoch": 0.3685476998882948, "grad_norm": 1.5151275396347046, "learning_rate": 7.2838806895603966e-06, "loss": 0.7901, "step": 10434 }, { "epoch": 0.3685830216920027, "grad_norm": 1.5635359287261963, "learning_rate": 7.28337182510928e-06, "loss": 0.828, "step": 10435 }, { "epoch": 0.3686183434957106, "grad_norm": 1.6114634275436401, "learning_rate": 7.2828629307738215e-06, "loss": 0.8223, "step": 10436 }, { "epoch": 0.3686536652994185, "grad_norm": 1.5393460988998413, "learning_rate": 7.282354006560681e-06, "loss": 0.8057, "step": 10437 }, { "epoch": 0.3686889871031264, "grad_norm": 1.5773628950119019, "learning_rate": 7.281845052476523e-06, "loss": 0.8207, "step": 10438 }, { "epoch": 0.36872430890683433, "grad_norm": 1.717494010925293, "learning_rate": 7.281336068528004e-06, "loss": 0.8213, "step": 10439 }, { "epoch": 0.36875963071054224, "grad_norm": 1.6876903772354126, "learning_rate": 7.280827054721789e-06, "loss": 0.7937, "step": 10440 }, { "epoch": 0.36879495251425015, "grad_norm": 1.692206621170044, "learning_rate": 7.280318011064538e-06, "loss": 0.8253, "step": 10441 }, { "epoch": 0.36883027431795806, "grad_norm": 1.5331406593322754, "learning_rate": 7.279808937562911e-06, "loss": 0.8096, "step": 10442 }, { "epoch": 0.368865596121666, "grad_norm": 1.7869244813919067, "learning_rate": 7.279299834223575e-06, "loss": 0.8178, "step": 10443 }, { "epoch": 0.3689009179253739, "grad_norm": 1.575807809829712, "learning_rate": 7.278790701053191e-06, "loss": 0.7975, "step": 10444 }, { "epoch": 0.36893623972908174, "grad_norm": 1.505236029624939, "learning_rate": 7.278281538058422e-06, "loss": 0.7945, "step": 10445 }, { "epoch": 0.36897156153278965, "grad_norm": 1.6647422313690186, "learning_rate": 7.277772345245935e-06, "loss": 0.8026, "step": 10446 }, { "epoch": 0.36900688333649756, "grad_norm": 1.9261244535446167, "learning_rate": 7.277263122622389e-06, "loss": 0.8361, "step": 10447 }, { "epoch": 0.36904220514020547, "grad_norm": 1.5058262348175049, "learning_rate": 7.276753870194452e-06, "loss": 0.8323, "step": 10448 }, { "epoch": 0.3690775269439134, "grad_norm": 1.629267930984497, "learning_rate": 7.2762445879687905e-06, "loss": 0.8165, "step": 10449 }, { "epoch": 0.3691128487476213, "grad_norm": 1.785757064819336, "learning_rate": 7.275735275952065e-06, "loss": 0.8206, "step": 10450 }, { "epoch": 0.3691481705513292, "grad_norm": 0.9842354655265808, "learning_rate": 7.275225934150944e-06, "loss": 0.5875, "step": 10451 }, { "epoch": 0.3691834923550371, "grad_norm": 1.7314261198043823, "learning_rate": 7.274716562572095e-06, "loss": 0.8018, "step": 10452 }, { "epoch": 0.369218814158745, "grad_norm": 1.9887877702713013, "learning_rate": 7.2742071612221825e-06, "loss": 0.8556, "step": 10453 }, { "epoch": 0.36925413596245293, "grad_norm": 1.808382511138916, "learning_rate": 7.273697730107875e-06, "loss": 0.8326, "step": 10454 }, { "epoch": 0.36928945776616084, "grad_norm": 1.7178902626037598, "learning_rate": 7.273188269235838e-06, "loss": 0.8152, "step": 10455 }, { "epoch": 0.36932477956986876, "grad_norm": 1.6631934642791748, "learning_rate": 7.272678778612741e-06, "loss": 0.8371, "step": 10456 }, { "epoch": 0.36936010137357667, "grad_norm": 1.6121249198913574, "learning_rate": 7.2721692582452504e-06, "loss": 0.8282, "step": 10457 }, { "epoch": 0.3693954231772845, "grad_norm": 1.5511187314987183, "learning_rate": 7.2716597081400376e-06, "loss": 0.8254, "step": 10458 }, { "epoch": 0.36943074498099243, "grad_norm": 1.6477575302124023, "learning_rate": 7.2711501283037675e-06, "loss": 0.8419, "step": 10459 }, { "epoch": 0.36946606678470034, "grad_norm": 1.8670133352279663, "learning_rate": 7.270640518743113e-06, "loss": 0.8225, "step": 10460 }, { "epoch": 0.36950138858840825, "grad_norm": 1.7411447763442993, "learning_rate": 7.270130879464741e-06, "loss": 0.8134, "step": 10461 }, { "epoch": 0.36953671039211616, "grad_norm": 1.6212364435195923, "learning_rate": 7.269621210475324e-06, "loss": 0.8104, "step": 10462 }, { "epoch": 0.3695720321958241, "grad_norm": 1.5512694120407104, "learning_rate": 7.269111511781529e-06, "loss": 0.8449, "step": 10463 }, { "epoch": 0.369607353999532, "grad_norm": 1.5756745338439941, "learning_rate": 7.268601783390031e-06, "loss": 0.8403, "step": 10464 }, { "epoch": 0.3696426758032399, "grad_norm": 1.665467619895935, "learning_rate": 7.268092025307499e-06, "loss": 0.7957, "step": 10465 }, { "epoch": 0.3696779976069478, "grad_norm": 1.7127642631530762, "learning_rate": 7.267582237540605e-06, "loss": 0.8143, "step": 10466 }, { "epoch": 0.3697133194106557, "grad_norm": 1.67938232421875, "learning_rate": 7.267072420096022e-06, "loss": 0.8326, "step": 10467 }, { "epoch": 0.3697486412143636, "grad_norm": 1.5708853006362915, "learning_rate": 7.266562572980419e-06, "loss": 0.8037, "step": 10468 }, { "epoch": 0.36978396301807154, "grad_norm": 1.73419988155365, "learning_rate": 7.266052696200472e-06, "loss": 0.8164, "step": 10469 }, { "epoch": 0.36981928482177945, "grad_norm": 1.913343906402588, "learning_rate": 7.265542789762854e-06, "loss": 0.8155, "step": 10470 }, { "epoch": 0.3698546066254873, "grad_norm": 1.5857117176055908, "learning_rate": 7.265032853674237e-06, "loss": 0.8114, "step": 10471 }, { "epoch": 0.3698899284291952, "grad_norm": 1.705588936805725, "learning_rate": 7.264522887941297e-06, "loss": 0.7823, "step": 10472 }, { "epoch": 0.3699252502329031, "grad_norm": 1.6360487937927246, "learning_rate": 7.264012892570705e-06, "loss": 0.8358, "step": 10473 }, { "epoch": 0.36996057203661104, "grad_norm": 1.7053344249725342, "learning_rate": 7.263502867569139e-06, "loss": 0.8346, "step": 10474 }, { "epoch": 0.36999589384031895, "grad_norm": 1.8358098268508911, "learning_rate": 7.262992812943274e-06, "loss": 0.8312, "step": 10475 }, { "epoch": 0.37003121564402686, "grad_norm": 1.5763605833053589, "learning_rate": 7.2624827286997826e-06, "loss": 0.8001, "step": 10476 }, { "epoch": 0.37006653744773477, "grad_norm": 1.5831444263458252, "learning_rate": 7.261972614845344e-06, "loss": 0.8277, "step": 10477 }, { "epoch": 0.3701018592514427, "grad_norm": 1.8068174123764038, "learning_rate": 7.261462471386633e-06, "loss": 0.8394, "step": 10478 }, { "epoch": 0.3701371810551506, "grad_norm": 1.7517213821411133, "learning_rate": 7.260952298330325e-06, "loss": 0.8206, "step": 10479 }, { "epoch": 0.3701725028588585, "grad_norm": 1.515804409980774, "learning_rate": 7.260442095683099e-06, "loss": 0.8, "step": 10480 }, { "epoch": 0.3702078246625664, "grad_norm": 1.7023309469223022, "learning_rate": 7.259931863451631e-06, "loss": 0.787, "step": 10481 }, { "epoch": 0.3702431464662743, "grad_norm": 1.7653803825378418, "learning_rate": 7.259421601642601e-06, "loss": 0.8085, "step": 10482 }, { "epoch": 0.37027846826998223, "grad_norm": 1.9793699979782104, "learning_rate": 7.258911310262686e-06, "loss": 0.8256, "step": 10483 }, { "epoch": 0.3703137900736901, "grad_norm": 1.8385728597640991, "learning_rate": 7.258400989318562e-06, "loss": 0.8543, "step": 10484 }, { "epoch": 0.370349111877398, "grad_norm": 1.7976444959640503, "learning_rate": 7.257890638816914e-06, "loss": 0.7766, "step": 10485 }, { "epoch": 0.3703844336811059, "grad_norm": 1.7750693559646606, "learning_rate": 7.2573802587644145e-06, "loss": 0.8331, "step": 10486 }, { "epoch": 0.3704197554848138, "grad_norm": 1.5952109098434448, "learning_rate": 7.256869849167749e-06, "loss": 0.7816, "step": 10487 }, { "epoch": 0.37045507728852173, "grad_norm": 1.8700029850006104, "learning_rate": 7.256359410033595e-06, "loss": 0.7925, "step": 10488 }, { "epoch": 0.37049039909222964, "grad_norm": 1.66354501247406, "learning_rate": 7.255848941368631e-06, "loss": 0.8063, "step": 10489 }, { "epoch": 0.37052572089593755, "grad_norm": 1.515618920326233, "learning_rate": 7.25533844317954e-06, "loss": 0.8024, "step": 10490 }, { "epoch": 0.37056104269964546, "grad_norm": 1.547316312789917, "learning_rate": 7.254827915473006e-06, "loss": 0.806, "step": 10491 }, { "epoch": 0.37059636450335337, "grad_norm": 1.6181234121322632, "learning_rate": 7.2543173582557055e-06, "loss": 0.855, "step": 10492 }, { "epoch": 0.3706316863070613, "grad_norm": 1.7131381034851074, "learning_rate": 7.253806771534325e-06, "loss": 0.818, "step": 10493 }, { "epoch": 0.3706670081107692, "grad_norm": 1.5670934915542603, "learning_rate": 7.253296155315545e-06, "loss": 0.8031, "step": 10494 }, { "epoch": 0.3707023299144771, "grad_norm": 1.5834141969680786, "learning_rate": 7.252785509606049e-06, "loss": 0.8105, "step": 10495 }, { "epoch": 0.370737651718185, "grad_norm": 1.46706223487854, "learning_rate": 7.252274834412519e-06, "loss": 0.8102, "step": 10496 }, { "epoch": 0.37077297352189287, "grad_norm": 1.6462138891220093, "learning_rate": 7.251764129741639e-06, "loss": 0.8022, "step": 10497 }, { "epoch": 0.3708082953256008, "grad_norm": 1.7071285247802734, "learning_rate": 7.251253395600093e-06, "loss": 0.8236, "step": 10498 }, { "epoch": 0.3708436171293087, "grad_norm": 1.440580129623413, "learning_rate": 7.250742631994566e-06, "loss": 0.8, "step": 10499 }, { "epoch": 0.3708789389330166, "grad_norm": 1.553468108177185, "learning_rate": 7.250231838931743e-06, "loss": 0.8019, "step": 10500 }, { "epoch": 0.3709142607367245, "grad_norm": 1.665613055229187, "learning_rate": 7.249721016418308e-06, "loss": 0.8305, "step": 10501 }, { "epoch": 0.3709495825404324, "grad_norm": 1.5191985368728638, "learning_rate": 7.249210164460948e-06, "loss": 0.7938, "step": 10502 }, { "epoch": 0.37098490434414033, "grad_norm": 1.0051342248916626, "learning_rate": 7.248699283066347e-06, "loss": 0.5613, "step": 10503 }, { "epoch": 0.37102022614784824, "grad_norm": 1.7873698472976685, "learning_rate": 7.248188372241195e-06, "loss": 0.831, "step": 10504 }, { "epoch": 0.37105554795155615, "grad_norm": 1.6300936937332153, "learning_rate": 7.247677431992174e-06, "loss": 0.7857, "step": 10505 }, { "epoch": 0.37109086975526406, "grad_norm": 1.7045016288757324, "learning_rate": 7.247166462325974e-06, "loss": 0.7899, "step": 10506 }, { "epoch": 0.371126191558972, "grad_norm": 1.8261137008666992, "learning_rate": 7.246655463249282e-06, "loss": 0.8235, "step": 10507 }, { "epoch": 0.3711615133626799, "grad_norm": 1.7787775993347168, "learning_rate": 7.246144434768785e-06, "loss": 0.8238, "step": 10508 }, { "epoch": 0.3711968351663878, "grad_norm": 1.6991279125213623, "learning_rate": 7.245633376891172e-06, "loss": 0.7718, "step": 10509 }, { "epoch": 0.37123215697009565, "grad_norm": 1.5184003114700317, "learning_rate": 7.245122289623131e-06, "loss": 0.8196, "step": 10510 }, { "epoch": 0.37126747877380356, "grad_norm": 1.5990688800811768, "learning_rate": 7.244611172971352e-06, "loss": 0.8328, "step": 10511 }, { "epoch": 0.37130280057751147, "grad_norm": 1.840464472770691, "learning_rate": 7.244100026942525e-06, "loss": 0.8772, "step": 10512 }, { "epoch": 0.3713381223812194, "grad_norm": 1.6090835332870483, "learning_rate": 7.243588851543338e-06, "loss": 0.7966, "step": 10513 }, { "epoch": 0.3713734441849273, "grad_norm": 1.7114126682281494, "learning_rate": 7.243077646780482e-06, "loss": 0.8176, "step": 10514 }, { "epoch": 0.3714087659886352, "grad_norm": 1.9119244813919067, "learning_rate": 7.242566412660646e-06, "loss": 0.8312, "step": 10515 }, { "epoch": 0.3714440877923431, "grad_norm": 1.6184325218200684, "learning_rate": 7.2420551491905235e-06, "loss": 0.8147, "step": 10516 }, { "epoch": 0.371479409596051, "grad_norm": 1.6876436471939087, "learning_rate": 7.241543856376805e-06, "loss": 0.8125, "step": 10517 }, { "epoch": 0.37151473139975894, "grad_norm": 1.5953468084335327, "learning_rate": 7.241032534226181e-06, "loss": 0.7783, "step": 10518 }, { "epoch": 0.37155005320346685, "grad_norm": 1.6395370960235596, "learning_rate": 7.2405211827453445e-06, "loss": 0.8461, "step": 10519 }, { "epoch": 0.37158537500717476, "grad_norm": 1.7400306463241577, "learning_rate": 7.240009801940989e-06, "loss": 0.8277, "step": 10520 }, { "epoch": 0.37162069681088267, "grad_norm": 1.782558798789978, "learning_rate": 7.239498391819805e-06, "loss": 0.8191, "step": 10521 }, { "epoch": 0.3716560186145906, "grad_norm": 1.660580039024353, "learning_rate": 7.238986952388488e-06, "loss": 0.8259, "step": 10522 }, { "epoch": 0.3716913404182985, "grad_norm": 1.6128944158554077, "learning_rate": 7.238475483653731e-06, "loss": 0.8319, "step": 10523 }, { "epoch": 0.37172666222200634, "grad_norm": 1.654780626296997, "learning_rate": 7.237963985622226e-06, "loss": 0.845, "step": 10524 }, { "epoch": 0.37176198402571425, "grad_norm": 1.6482802629470825, "learning_rate": 7.237452458300671e-06, "loss": 0.8131, "step": 10525 }, { "epoch": 0.37179730582942216, "grad_norm": 1.6524114608764648, "learning_rate": 7.236940901695757e-06, "loss": 0.8036, "step": 10526 }, { "epoch": 0.3718326276331301, "grad_norm": 1.7837694883346558, "learning_rate": 7.236429315814182e-06, "loss": 0.8137, "step": 10527 }, { "epoch": 0.371867949436838, "grad_norm": 1.6246832609176636, "learning_rate": 7.2359177006626405e-06, "loss": 0.8154, "step": 10528 }, { "epoch": 0.3719032712405459, "grad_norm": 1.5327589511871338, "learning_rate": 7.235406056247827e-06, "loss": 0.7911, "step": 10529 }, { "epoch": 0.3719385930442538, "grad_norm": 1.903735876083374, "learning_rate": 7.234894382576441e-06, "loss": 0.8301, "step": 10530 }, { "epoch": 0.3719739148479617, "grad_norm": 1.8502203226089478, "learning_rate": 7.2343826796551775e-06, "loss": 0.7927, "step": 10531 }, { "epoch": 0.37200923665166963, "grad_norm": 1.8404101133346558, "learning_rate": 7.233870947490733e-06, "loss": 0.7926, "step": 10532 }, { "epoch": 0.37204455845537754, "grad_norm": 1.5686399936676025, "learning_rate": 7.233359186089806e-06, "loss": 0.7761, "step": 10533 }, { "epoch": 0.37207988025908545, "grad_norm": 1.7538291215896606, "learning_rate": 7.232847395459092e-06, "loss": 0.8049, "step": 10534 }, { "epoch": 0.37211520206279336, "grad_norm": 1.5746846199035645, "learning_rate": 7.2323355756052925e-06, "loss": 0.8135, "step": 10535 }, { "epoch": 0.37215052386650127, "grad_norm": 0.9484636187553406, "learning_rate": 7.231823726535104e-06, "loss": 0.5984, "step": 10536 }, { "epoch": 0.3721858456702091, "grad_norm": 1.746809482574463, "learning_rate": 7.231311848255227e-06, "loss": 0.8067, "step": 10537 }, { "epoch": 0.37222116747391704, "grad_norm": 2.011068344116211, "learning_rate": 7.23079994077236e-06, "loss": 0.8377, "step": 10538 }, { "epoch": 0.37225648927762495, "grad_norm": 1.6989401578903198, "learning_rate": 7.2302880040932e-06, "loss": 0.8251, "step": 10539 }, { "epoch": 0.37229181108133286, "grad_norm": 1.6524403095245361, "learning_rate": 7.229776038224453e-06, "loss": 0.8245, "step": 10540 }, { "epoch": 0.37232713288504077, "grad_norm": 1.6510536670684814, "learning_rate": 7.229264043172816e-06, "loss": 0.8638, "step": 10541 }, { "epoch": 0.3723624546887487, "grad_norm": 1.873556137084961, "learning_rate": 7.228752018944989e-06, "loss": 0.7951, "step": 10542 }, { "epoch": 0.3723977764924566, "grad_norm": 1.5698790550231934, "learning_rate": 7.228239965547675e-06, "loss": 0.8253, "step": 10543 }, { "epoch": 0.3724330982961645, "grad_norm": 1.753058910369873, "learning_rate": 7.227727882987576e-06, "loss": 0.8398, "step": 10544 }, { "epoch": 0.3724684200998724, "grad_norm": 1.8044670820236206, "learning_rate": 7.227215771271392e-06, "loss": 0.86, "step": 10545 }, { "epoch": 0.3725037419035803, "grad_norm": 1.6285711526870728, "learning_rate": 7.226703630405827e-06, "loss": 0.821, "step": 10546 }, { "epoch": 0.37253906370728823, "grad_norm": 1.7297455072402954, "learning_rate": 7.226191460397583e-06, "loss": 0.8126, "step": 10547 }, { "epoch": 0.37257438551099614, "grad_norm": 1.648627519607544, "learning_rate": 7.225679261253365e-06, "loss": 0.8013, "step": 10548 }, { "epoch": 0.37260970731470405, "grad_norm": 1.8765712976455688, "learning_rate": 7.225167032979875e-06, "loss": 0.8226, "step": 10549 }, { "epoch": 0.3726450291184119, "grad_norm": 1.82735276222229, "learning_rate": 7.224654775583817e-06, "loss": 0.8298, "step": 10550 }, { "epoch": 0.3726803509221198, "grad_norm": 1.7301052808761597, "learning_rate": 7.224142489071895e-06, "loss": 0.8042, "step": 10551 }, { "epoch": 0.37271567272582773, "grad_norm": 1.4643865823745728, "learning_rate": 7.2236301734508145e-06, "loss": 0.7633, "step": 10552 }, { "epoch": 0.37275099452953564, "grad_norm": 1.662041425704956, "learning_rate": 7.22311782872728e-06, "loss": 0.7932, "step": 10553 }, { "epoch": 0.37278631633324355, "grad_norm": 1.7359200716018677, "learning_rate": 7.222605454907997e-06, "loss": 0.8617, "step": 10554 }, { "epoch": 0.37282163813695146, "grad_norm": 2.0124032497406006, "learning_rate": 7.222093051999671e-06, "loss": 0.8258, "step": 10555 }, { "epoch": 0.37285695994065937, "grad_norm": 1.585616111755371, "learning_rate": 7.221580620009011e-06, "loss": 0.8457, "step": 10556 }, { "epoch": 0.3728922817443673, "grad_norm": 1.7943603992462158, "learning_rate": 7.22106815894272e-06, "loss": 0.8149, "step": 10557 }, { "epoch": 0.3729276035480752, "grad_norm": 2.0967161655426025, "learning_rate": 7.220555668807507e-06, "loss": 0.8214, "step": 10558 }, { "epoch": 0.3729629253517831, "grad_norm": 1.5472722053527832, "learning_rate": 7.220043149610079e-06, "loss": 0.805, "step": 10559 }, { "epoch": 0.372998247155491, "grad_norm": 1.5914061069488525, "learning_rate": 7.219530601357143e-06, "loss": 0.837, "step": 10560 }, { "epoch": 0.3730335689591989, "grad_norm": 1.7295359373092651, "learning_rate": 7.219018024055408e-06, "loss": 0.8459, "step": 10561 }, { "epoch": 0.37306889076290684, "grad_norm": 1.7116349935531616, "learning_rate": 7.218505417711582e-06, "loss": 0.8129, "step": 10562 }, { "epoch": 0.3731042125666147, "grad_norm": 1.8300752639770508, "learning_rate": 7.217992782332374e-06, "loss": 0.7954, "step": 10563 }, { "epoch": 0.3731395343703226, "grad_norm": 1.5848668813705444, "learning_rate": 7.217480117924494e-06, "loss": 0.7886, "step": 10564 }, { "epoch": 0.3731748561740305, "grad_norm": 1.8385231494903564, "learning_rate": 7.21696742449465e-06, "loss": 0.809, "step": 10565 }, { "epoch": 0.3732101779777384, "grad_norm": 1.662631869316101, "learning_rate": 7.216454702049554e-06, "loss": 0.8113, "step": 10566 }, { "epoch": 0.37324549978144633, "grad_norm": 1.6059256792068481, "learning_rate": 7.215941950595915e-06, "loss": 0.787, "step": 10567 }, { "epoch": 0.37328082158515424, "grad_norm": 1.7821215391159058, "learning_rate": 7.215429170140444e-06, "loss": 0.8339, "step": 10568 }, { "epoch": 0.37331614338886215, "grad_norm": 1.9621926546096802, "learning_rate": 7.214916360689854e-06, "loss": 0.8168, "step": 10569 }, { "epoch": 0.37335146519257006, "grad_norm": 1.7946445941925049, "learning_rate": 7.2144035222508545e-06, "loss": 0.8234, "step": 10570 }, { "epoch": 0.373386786996278, "grad_norm": 1.8684263229370117, "learning_rate": 7.213890654830158e-06, "loss": 0.7867, "step": 10571 }, { "epoch": 0.3734221087999859, "grad_norm": 1.610472321510315, "learning_rate": 7.213377758434476e-06, "loss": 0.8129, "step": 10572 }, { "epoch": 0.3734574306036938, "grad_norm": 1.5072686672210693, "learning_rate": 7.212864833070521e-06, "loss": 0.7896, "step": 10573 }, { "epoch": 0.3734927524074017, "grad_norm": 1.6859321594238281, "learning_rate": 7.212351878745009e-06, "loss": 0.8239, "step": 10574 }, { "epoch": 0.3735280742111096, "grad_norm": 1.7113264799118042, "learning_rate": 7.211838895464651e-06, "loss": 0.8104, "step": 10575 }, { "epoch": 0.3735633960148175, "grad_norm": 1.7389360666275024, "learning_rate": 7.211325883236161e-06, "loss": 0.8093, "step": 10576 }, { "epoch": 0.3735987178185254, "grad_norm": 1.4834129810333252, "learning_rate": 7.2108128420662535e-06, "loss": 0.8246, "step": 10577 }, { "epoch": 0.3736340396222333, "grad_norm": 1.6527302265167236, "learning_rate": 7.2102997719616445e-06, "loss": 0.7894, "step": 10578 }, { "epoch": 0.3736693614259412, "grad_norm": 1.7123113870620728, "learning_rate": 7.209786672929045e-06, "loss": 0.8133, "step": 10579 }, { "epoch": 0.3737046832296491, "grad_norm": 1.9875138998031616, "learning_rate": 7.209273544975175e-06, "loss": 0.7797, "step": 10580 }, { "epoch": 0.373740005033357, "grad_norm": 1.8282227516174316, "learning_rate": 7.208760388106747e-06, "loss": 0.8217, "step": 10581 }, { "epoch": 0.37377532683706494, "grad_norm": 1.6014811992645264, "learning_rate": 7.208247202330478e-06, "loss": 0.7614, "step": 10582 }, { "epoch": 0.37381064864077285, "grad_norm": 1.5941599607467651, "learning_rate": 7.207733987653087e-06, "loss": 0.8129, "step": 10583 }, { "epoch": 0.37384597044448076, "grad_norm": 1.613066554069519, "learning_rate": 7.207220744081286e-06, "loss": 0.8161, "step": 10584 }, { "epoch": 0.37388129224818867, "grad_norm": 1.7792168855667114, "learning_rate": 7.206707471621796e-06, "loss": 0.8096, "step": 10585 }, { "epoch": 0.3739166140518966, "grad_norm": 1.8663543462753296, "learning_rate": 7.206194170281333e-06, "loss": 0.8435, "step": 10586 }, { "epoch": 0.3739519358556045, "grad_norm": 1.676202416419983, "learning_rate": 7.205680840066617e-06, "loss": 0.7656, "step": 10587 }, { "epoch": 0.3739872576593124, "grad_norm": 1.7472132444381714, "learning_rate": 7.205167480984363e-06, "loss": 0.832, "step": 10588 }, { "epoch": 0.37402257946302025, "grad_norm": 1.7287989854812622, "learning_rate": 7.204654093041291e-06, "loss": 0.8292, "step": 10589 }, { "epoch": 0.37405790126672817, "grad_norm": 1.7517625093460083, "learning_rate": 7.2041406762441225e-06, "loss": 0.8157, "step": 10590 }, { "epoch": 0.3740932230704361, "grad_norm": 1.6694985628128052, "learning_rate": 7.203627230599575e-06, "loss": 0.8087, "step": 10591 }, { "epoch": 0.374128544874144, "grad_norm": 1.5834150314331055, "learning_rate": 7.2031137561143664e-06, "loss": 0.8114, "step": 10592 }, { "epoch": 0.3741638666778519, "grad_norm": 1.6713799238204956, "learning_rate": 7.202600252795221e-06, "loss": 0.8335, "step": 10593 }, { "epoch": 0.3741991884815598, "grad_norm": 1.5866432189941406, "learning_rate": 7.202086720648858e-06, "loss": 0.8362, "step": 10594 }, { "epoch": 0.3742345102852677, "grad_norm": 1.6149827241897583, "learning_rate": 7.201573159681996e-06, "loss": 0.8083, "step": 10595 }, { "epoch": 0.37426983208897563, "grad_norm": 1.6975361108779907, "learning_rate": 7.201059569901359e-06, "loss": 0.8146, "step": 10596 }, { "epoch": 0.37430515389268354, "grad_norm": 1.6330853700637817, "learning_rate": 7.2005459513136684e-06, "loss": 0.808, "step": 10597 }, { "epoch": 0.37434047569639145, "grad_norm": 4.457854270935059, "learning_rate": 7.200032303925646e-06, "loss": 0.7892, "step": 10598 }, { "epoch": 0.37437579750009936, "grad_norm": 1.7144317626953125, "learning_rate": 7.199518627744015e-06, "loss": 0.8298, "step": 10599 }, { "epoch": 0.37441111930380727, "grad_norm": 1.6608880758285522, "learning_rate": 7.199004922775497e-06, "loss": 0.7773, "step": 10600 }, { "epoch": 0.3744464411075152, "grad_norm": 1.6798498630523682, "learning_rate": 7.198491189026816e-06, "loss": 0.8641, "step": 10601 }, { "epoch": 0.37448176291122304, "grad_norm": 1.5605355501174927, "learning_rate": 7.197977426504696e-06, "loss": 0.8156, "step": 10602 }, { "epoch": 0.37451708471493095, "grad_norm": 1.713523268699646, "learning_rate": 7.19746363521586e-06, "loss": 0.7878, "step": 10603 }, { "epoch": 0.37455240651863886, "grad_norm": 2.0185470581054688, "learning_rate": 7.196949815167034e-06, "loss": 0.8188, "step": 10604 }, { "epoch": 0.37458772832234677, "grad_norm": 1.5612789392471313, "learning_rate": 7.1964359663649395e-06, "loss": 0.8073, "step": 10605 }, { "epoch": 0.3746230501260547, "grad_norm": 1.6913572549819946, "learning_rate": 7.195922088816307e-06, "loss": 0.8283, "step": 10606 }, { "epoch": 0.3746583719297626, "grad_norm": 1.6458606719970703, "learning_rate": 7.1954081825278565e-06, "loss": 0.8368, "step": 10607 }, { "epoch": 0.3746936937334705, "grad_norm": 1.9311485290527344, "learning_rate": 7.194894247506318e-06, "loss": 0.8238, "step": 10608 }, { "epoch": 0.3747290155371784, "grad_norm": 1.0300500392913818, "learning_rate": 7.194380283758415e-06, "loss": 0.6035, "step": 10609 }, { "epoch": 0.3747643373408863, "grad_norm": 1.5861358642578125, "learning_rate": 7.193866291290877e-06, "loss": 0.7941, "step": 10610 }, { "epoch": 0.37479965914459423, "grad_norm": 2.2490711212158203, "learning_rate": 7.193352270110427e-06, "loss": 0.8412, "step": 10611 }, { "epoch": 0.37483498094830214, "grad_norm": 1.7113922834396362, "learning_rate": 7.192838220223796e-06, "loss": 0.8099, "step": 10612 }, { "epoch": 0.37487030275201005, "grad_norm": 1.7411479949951172, "learning_rate": 7.192324141637711e-06, "loss": 0.8485, "step": 10613 }, { "epoch": 0.37490562455571796, "grad_norm": 1.755185604095459, "learning_rate": 7.191810034358899e-06, "loss": 0.7925, "step": 10614 }, { "epoch": 0.3749409463594258, "grad_norm": 1.7621492147445679, "learning_rate": 7.191295898394091e-06, "loss": 0.8149, "step": 10615 }, { "epoch": 0.37497626816313373, "grad_norm": 1.6903403997421265, "learning_rate": 7.190781733750013e-06, "loss": 0.7804, "step": 10616 }, { "epoch": 0.37501158996684164, "grad_norm": 1.7937380075454712, "learning_rate": 7.190267540433395e-06, "loss": 0.8663, "step": 10617 }, { "epoch": 0.37504691177054955, "grad_norm": 1.5371969938278198, "learning_rate": 7.189753318450967e-06, "loss": 0.832, "step": 10618 }, { "epoch": 0.37508223357425746, "grad_norm": 1.648816704750061, "learning_rate": 7.18923906780946e-06, "loss": 0.7803, "step": 10619 }, { "epoch": 0.3751175553779654, "grad_norm": 1.9878816604614258, "learning_rate": 7.188724788515604e-06, "loss": 0.8378, "step": 10620 }, { "epoch": 0.3751528771816733, "grad_norm": 1.678375244140625, "learning_rate": 7.188210480576128e-06, "loss": 0.8543, "step": 10621 }, { "epoch": 0.3751881989853812, "grad_norm": 1.5846130847930908, "learning_rate": 7.187696143997766e-06, "loss": 0.8275, "step": 10622 }, { "epoch": 0.3752235207890891, "grad_norm": 1.6299606561660767, "learning_rate": 7.187181778787248e-06, "loss": 0.7983, "step": 10623 }, { "epoch": 0.375258842592797, "grad_norm": 1.5849609375, "learning_rate": 7.186667384951306e-06, "loss": 0.8125, "step": 10624 }, { "epoch": 0.3752941643965049, "grad_norm": 1.6183487176895142, "learning_rate": 7.186152962496671e-06, "loss": 0.8369, "step": 10625 }, { "epoch": 0.37532948620021284, "grad_norm": 2.0838828086853027, "learning_rate": 7.1856385114300775e-06, "loss": 0.7975, "step": 10626 }, { "epoch": 0.37536480800392075, "grad_norm": 1.6711108684539795, "learning_rate": 7.1851240317582585e-06, "loss": 0.8258, "step": 10627 }, { "epoch": 0.3754001298076286, "grad_norm": 1.6426727771759033, "learning_rate": 7.184609523487947e-06, "loss": 0.8337, "step": 10628 }, { "epoch": 0.3754354516113365, "grad_norm": 1.5643037557601929, "learning_rate": 7.184094986625876e-06, "loss": 0.8104, "step": 10629 }, { "epoch": 0.3754707734150444, "grad_norm": 1.7562692165374756, "learning_rate": 7.183580421178781e-06, "loss": 0.8071, "step": 10630 }, { "epoch": 0.37550609521875233, "grad_norm": 1.6668471097946167, "learning_rate": 7.183065827153395e-06, "loss": 0.8445, "step": 10631 }, { "epoch": 0.37554141702246024, "grad_norm": 1.5823380947113037, "learning_rate": 7.182551204556455e-06, "loss": 0.7895, "step": 10632 }, { "epoch": 0.37557673882616815, "grad_norm": 1.4491554498672485, "learning_rate": 7.1820365533946944e-06, "loss": 0.8051, "step": 10633 }, { "epoch": 0.37561206062987607, "grad_norm": 1.7090678215026855, "learning_rate": 7.1815218736748504e-06, "loss": 0.7936, "step": 10634 }, { "epoch": 0.375647382433584, "grad_norm": 1.5042495727539062, "learning_rate": 7.181007165403659e-06, "loss": 0.8032, "step": 10635 }, { "epoch": 0.3756827042372919, "grad_norm": 1.8006552457809448, "learning_rate": 7.180492428587853e-06, "loss": 0.8689, "step": 10636 }, { "epoch": 0.3757180260409998, "grad_norm": 1.6350300312042236, "learning_rate": 7.179977663234173e-06, "loss": 0.813, "step": 10637 }, { "epoch": 0.3757533478447077, "grad_norm": 1.8705778121948242, "learning_rate": 7.1794628693493565e-06, "loss": 0.7977, "step": 10638 }, { "epoch": 0.3757886696484156, "grad_norm": 1.6271096467971802, "learning_rate": 7.178948046940137e-06, "loss": 0.8051, "step": 10639 }, { "epoch": 0.37582399145212353, "grad_norm": 1.5103472471237183, "learning_rate": 7.178433196013258e-06, "loss": 0.8137, "step": 10640 }, { "epoch": 0.3758593132558314, "grad_norm": 1.597185492515564, "learning_rate": 7.177918316575454e-06, "loss": 0.8019, "step": 10641 }, { "epoch": 0.3758946350595393, "grad_norm": 1.747045636177063, "learning_rate": 7.1774034086334635e-06, "loss": 0.8178, "step": 10642 }, { "epoch": 0.3759299568632472, "grad_norm": 9.246991157531738, "learning_rate": 7.176888472194028e-06, "loss": 0.8435, "step": 10643 }, { "epoch": 0.3759652786669551, "grad_norm": 1.7916290760040283, "learning_rate": 7.176373507263884e-06, "loss": 0.8271, "step": 10644 }, { "epoch": 0.376000600470663, "grad_norm": 1.7285021543502808, "learning_rate": 7.175858513849774e-06, "loss": 0.8029, "step": 10645 }, { "epoch": 0.37603592227437094, "grad_norm": 1.8433551788330078, "learning_rate": 7.175343491958436e-06, "loss": 0.8333, "step": 10646 }, { "epoch": 0.37607124407807885, "grad_norm": 1.5088236331939697, "learning_rate": 7.174828441596611e-06, "loss": 0.8193, "step": 10647 }, { "epoch": 0.37610656588178676, "grad_norm": 1.5009205341339111, "learning_rate": 7.174313362771039e-06, "loss": 0.7727, "step": 10648 }, { "epoch": 0.37614188768549467, "grad_norm": 1.5009220838546753, "learning_rate": 7.173798255488464e-06, "loss": 0.8009, "step": 10649 }, { "epoch": 0.3761772094892026, "grad_norm": 1.7874759435653687, "learning_rate": 7.173283119755626e-06, "loss": 0.8115, "step": 10650 }, { "epoch": 0.3762125312929105, "grad_norm": 1.518931269645691, "learning_rate": 7.172767955579267e-06, "loss": 0.7795, "step": 10651 }, { "epoch": 0.3762478530966184, "grad_norm": 1.7623491287231445, "learning_rate": 7.1722527629661295e-06, "loss": 0.8042, "step": 10652 }, { "epoch": 0.3762831749003263, "grad_norm": 1.4530630111694336, "learning_rate": 7.171737541922956e-06, "loss": 0.8139, "step": 10653 }, { "epoch": 0.37631849670403417, "grad_norm": 1.6347583532333374, "learning_rate": 7.1712222924564915e-06, "loss": 0.8072, "step": 10654 }, { "epoch": 0.3763538185077421, "grad_norm": 1.5855001211166382, "learning_rate": 7.170707014573474e-06, "loss": 0.8162, "step": 10655 }, { "epoch": 0.37638914031145, "grad_norm": 1.6329916715621948, "learning_rate": 7.170191708280654e-06, "loss": 0.8081, "step": 10656 }, { "epoch": 0.3764244621151579, "grad_norm": 1.6363948583602905, "learning_rate": 7.169676373584773e-06, "loss": 0.7983, "step": 10657 }, { "epoch": 0.3764597839188658, "grad_norm": 1.6541528701782227, "learning_rate": 7.169161010492574e-06, "loss": 0.7754, "step": 10658 }, { "epoch": 0.3764951057225737, "grad_norm": 1.4939546585083008, "learning_rate": 7.168645619010805e-06, "loss": 0.8402, "step": 10659 }, { "epoch": 0.37653042752628163, "grad_norm": 1.5823734998703003, "learning_rate": 7.168130199146207e-06, "loss": 0.7781, "step": 10660 }, { "epoch": 0.37656574932998954, "grad_norm": 1.470070719718933, "learning_rate": 7.167614750905532e-06, "loss": 0.792, "step": 10661 }, { "epoch": 0.37660107113369745, "grad_norm": 1.4876958131790161, "learning_rate": 7.167099274295521e-06, "loss": 0.8087, "step": 10662 }, { "epoch": 0.37663639293740536, "grad_norm": 1.6555454730987549, "learning_rate": 7.166583769322921e-06, "loss": 0.8223, "step": 10663 }, { "epoch": 0.3766717147411133, "grad_norm": 1.8890713453292847, "learning_rate": 7.166068235994482e-06, "loss": 0.8251, "step": 10664 }, { "epoch": 0.3767070365448212, "grad_norm": 1.7820028066635132, "learning_rate": 7.165552674316947e-06, "loss": 0.8317, "step": 10665 }, { "epoch": 0.3767423583485291, "grad_norm": 1.5534623861312866, "learning_rate": 7.1650370842970674e-06, "loss": 0.8044, "step": 10666 }, { "epoch": 0.37677768015223695, "grad_norm": 2.0247747898101807, "learning_rate": 7.16452146594159e-06, "loss": 0.8383, "step": 10667 }, { "epoch": 0.37681300195594486, "grad_norm": 1.7618217468261719, "learning_rate": 7.1640058192572595e-06, "loss": 0.8197, "step": 10668 }, { "epoch": 0.37684832375965277, "grad_norm": 1.825942873954773, "learning_rate": 7.16349014425083e-06, "loss": 0.8271, "step": 10669 }, { "epoch": 0.3768836455633607, "grad_norm": 1.510048270225525, "learning_rate": 7.162974440929048e-06, "loss": 0.8148, "step": 10670 }, { "epoch": 0.3769189673670686, "grad_norm": 1.6204644441604614, "learning_rate": 7.1624587092986635e-06, "loss": 0.791, "step": 10671 }, { "epoch": 0.3769542891707765, "grad_norm": 2.1345951557159424, "learning_rate": 7.161942949366425e-06, "loss": 0.8202, "step": 10672 }, { "epoch": 0.3769896109744844, "grad_norm": 1.591017484664917, "learning_rate": 7.161427161139083e-06, "loss": 0.8036, "step": 10673 }, { "epoch": 0.3770249327781923, "grad_norm": 1.5168148279190063, "learning_rate": 7.160911344623389e-06, "loss": 0.8251, "step": 10674 }, { "epoch": 0.37706025458190023, "grad_norm": 1.7062389850616455, "learning_rate": 7.160395499826095e-06, "loss": 0.8174, "step": 10675 }, { "epoch": 0.37709557638560814, "grad_norm": 1.616835594177246, "learning_rate": 7.1598796267539485e-06, "loss": 0.7971, "step": 10676 }, { "epoch": 0.37713089818931606, "grad_norm": 1.843841791152954, "learning_rate": 7.159363725413705e-06, "loss": 0.8108, "step": 10677 }, { "epoch": 0.37716621999302397, "grad_norm": 1.4563560485839844, "learning_rate": 7.158847795812115e-06, "loss": 0.8129, "step": 10678 }, { "epoch": 0.3772015417967319, "grad_norm": 1.607940435409546, "learning_rate": 7.15833183795593e-06, "loss": 0.7922, "step": 10679 }, { "epoch": 0.37723686360043973, "grad_norm": 1.8515316247940063, "learning_rate": 7.1578158518519046e-06, "loss": 0.8393, "step": 10680 }, { "epoch": 0.37727218540414764, "grad_norm": 3.7193100452423096, "learning_rate": 7.15729983750679e-06, "loss": 0.8285, "step": 10681 }, { "epoch": 0.37730750720785555, "grad_norm": 1.7009061574935913, "learning_rate": 7.156783794927342e-06, "loss": 0.798, "step": 10682 }, { "epoch": 0.37734282901156346, "grad_norm": 1.5829896926879883, "learning_rate": 7.1562677241203115e-06, "loss": 0.8082, "step": 10683 }, { "epoch": 0.3773781508152714, "grad_norm": 1.99149489402771, "learning_rate": 7.155751625092455e-06, "loss": 0.804, "step": 10684 }, { "epoch": 0.3774134726189793, "grad_norm": 1.6133482456207275, "learning_rate": 7.155235497850527e-06, "loss": 0.8474, "step": 10685 }, { "epoch": 0.3774487944226872, "grad_norm": 1.5599400997161865, "learning_rate": 7.154719342401281e-06, "loss": 0.8171, "step": 10686 }, { "epoch": 0.3774841162263951, "grad_norm": 0.9347071647644043, "learning_rate": 7.154203158751474e-06, "loss": 0.6202, "step": 10687 }, { "epoch": 0.377519438030103, "grad_norm": 1.718946099281311, "learning_rate": 7.153686946907861e-06, "loss": 0.807, "step": 10688 }, { "epoch": 0.3775547598338109, "grad_norm": 1.659392237663269, "learning_rate": 7.153170706877198e-06, "loss": 0.811, "step": 10689 }, { "epoch": 0.37759008163751884, "grad_norm": 1.6957783699035645, "learning_rate": 7.1526544386662425e-06, "loss": 0.8074, "step": 10690 }, { "epoch": 0.37762540344122675, "grad_norm": 1.5406219959259033, "learning_rate": 7.152138142281748e-06, "loss": 0.8079, "step": 10691 }, { "epoch": 0.37766072524493466, "grad_norm": 1.5112497806549072, "learning_rate": 7.151621817730475e-06, "loss": 0.8038, "step": 10692 }, { "epoch": 0.3776960470486425, "grad_norm": 1.610853672027588, "learning_rate": 7.151105465019182e-06, "loss": 0.8225, "step": 10693 }, { "epoch": 0.3777313688523504, "grad_norm": 1.6949479579925537, "learning_rate": 7.150589084154622e-06, "loss": 0.8671, "step": 10694 }, { "epoch": 0.37776669065605833, "grad_norm": 1.5121580362319946, "learning_rate": 7.150072675143558e-06, "loss": 0.7985, "step": 10695 }, { "epoch": 0.37780201245976625, "grad_norm": 1.8364163637161255, "learning_rate": 7.149556237992748e-06, "loss": 0.8355, "step": 10696 }, { "epoch": 0.37783733426347416, "grad_norm": 1.731010913848877, "learning_rate": 7.1490397727089474e-06, "loss": 0.8273, "step": 10697 }, { "epoch": 0.37787265606718207, "grad_norm": 0.9539805054664612, "learning_rate": 7.148523279298921e-06, "loss": 0.5916, "step": 10698 }, { "epoch": 0.37790797787089, "grad_norm": 1.9919459819793701, "learning_rate": 7.148006757769423e-06, "loss": 0.8215, "step": 10699 }, { "epoch": 0.3779432996745979, "grad_norm": 1.6915130615234375, "learning_rate": 7.147490208127219e-06, "loss": 0.8041, "step": 10700 }, { "epoch": 0.3779786214783058, "grad_norm": 2.0063095092773438, "learning_rate": 7.146973630379066e-06, "loss": 0.8282, "step": 10701 }, { "epoch": 0.3780139432820137, "grad_norm": 1.947061538696289, "learning_rate": 7.146457024531724e-06, "loss": 0.8441, "step": 10702 }, { "epoch": 0.3780492650857216, "grad_norm": 1.9273550510406494, "learning_rate": 7.145940390591958e-06, "loss": 0.7969, "step": 10703 }, { "epoch": 0.37808458688942953, "grad_norm": 1.7091747522354126, "learning_rate": 7.145423728566528e-06, "loss": 0.8381, "step": 10704 }, { "epoch": 0.37811990869313744, "grad_norm": 1.72211754322052, "learning_rate": 7.144907038462193e-06, "loss": 0.8333, "step": 10705 }, { "epoch": 0.3781552304968453, "grad_norm": 1.7497738599777222, "learning_rate": 7.144390320285719e-06, "loss": 0.7798, "step": 10706 }, { "epoch": 0.3781905523005532, "grad_norm": 1.6457382440567017, "learning_rate": 7.143873574043869e-06, "loss": 0.8244, "step": 10707 }, { "epoch": 0.3782258741042611, "grad_norm": 1.688963770866394, "learning_rate": 7.143356799743404e-06, "loss": 0.8699, "step": 10708 }, { "epoch": 0.37826119590796903, "grad_norm": 1.7348005771636963, "learning_rate": 7.1428399973910876e-06, "loss": 0.8255, "step": 10709 }, { "epoch": 0.37829651771167694, "grad_norm": 1.7127288579940796, "learning_rate": 7.142323166993683e-06, "loss": 0.8214, "step": 10710 }, { "epoch": 0.37833183951538485, "grad_norm": 1.6760618686676025, "learning_rate": 7.141806308557957e-06, "loss": 0.8465, "step": 10711 }, { "epoch": 0.37836716131909276, "grad_norm": 1.7550867795944214, "learning_rate": 7.141289422090674e-06, "loss": 0.7995, "step": 10712 }, { "epoch": 0.37840248312280067, "grad_norm": 1.547350525856018, "learning_rate": 7.140772507598595e-06, "loss": 0.8082, "step": 10713 }, { "epoch": 0.3784378049265086, "grad_norm": 1.7011297941207886, "learning_rate": 7.140255565088488e-06, "loss": 0.7999, "step": 10714 }, { "epoch": 0.3784731267302165, "grad_norm": 1.7159360647201538, "learning_rate": 7.13973859456712e-06, "loss": 0.8064, "step": 10715 }, { "epoch": 0.3785084485339244, "grad_norm": 1.582409381866455, "learning_rate": 7.139221596041253e-06, "loss": 0.8311, "step": 10716 }, { "epoch": 0.3785437703376323, "grad_norm": 1.4847787618637085, "learning_rate": 7.138704569517661e-06, "loss": 0.8268, "step": 10717 }, { "epoch": 0.3785790921413402, "grad_norm": 1.5687103271484375, "learning_rate": 7.138187515003101e-06, "loss": 0.8149, "step": 10718 }, { "epoch": 0.3786144139450481, "grad_norm": 1.676286220550537, "learning_rate": 7.137670432504345e-06, "loss": 0.7969, "step": 10719 }, { "epoch": 0.378649735748756, "grad_norm": 1.583964228630066, "learning_rate": 7.137153322028162e-06, "loss": 0.8122, "step": 10720 }, { "epoch": 0.3786850575524639, "grad_norm": 1.6994969844818115, "learning_rate": 7.136636183581317e-06, "loss": 0.8178, "step": 10721 }, { "epoch": 0.3787203793561718, "grad_norm": 1.6037510633468628, "learning_rate": 7.13611901717058e-06, "loss": 0.8137, "step": 10722 }, { "epoch": 0.3787557011598797, "grad_norm": 1.5912936925888062, "learning_rate": 7.135601822802717e-06, "loss": 0.8039, "step": 10723 }, { "epoch": 0.37879102296358763, "grad_norm": 1.6253407001495361, "learning_rate": 7.135084600484501e-06, "loss": 0.8279, "step": 10724 }, { "epoch": 0.37882634476729554, "grad_norm": 2.5029096603393555, "learning_rate": 7.134567350222698e-06, "loss": 0.8169, "step": 10725 }, { "epoch": 0.37886166657100345, "grad_norm": 1.7533204555511475, "learning_rate": 7.134050072024079e-06, "loss": 0.8815, "step": 10726 }, { "epoch": 0.37889698837471136, "grad_norm": 1.880737066268921, "learning_rate": 7.133532765895414e-06, "loss": 0.8393, "step": 10727 }, { "epoch": 0.3789323101784193, "grad_norm": 1.7373895645141602, "learning_rate": 7.133015431843473e-06, "loss": 0.8235, "step": 10728 }, { "epoch": 0.3789676319821272, "grad_norm": 1.570032000541687, "learning_rate": 7.132498069875028e-06, "loss": 0.7573, "step": 10729 }, { "epoch": 0.3790029537858351, "grad_norm": 1.658747673034668, "learning_rate": 7.131980679996848e-06, "loss": 0.8323, "step": 10730 }, { "epoch": 0.379038275589543, "grad_norm": 1.6107456684112549, "learning_rate": 7.131463262215706e-06, "loss": 0.8031, "step": 10731 }, { "epoch": 0.37907359739325086, "grad_norm": 1.5460256338119507, "learning_rate": 7.130945816538372e-06, "loss": 0.8299, "step": 10732 }, { "epoch": 0.37910891919695877, "grad_norm": 1.6874924898147583, "learning_rate": 7.130428342971622e-06, "loss": 0.8298, "step": 10733 }, { "epoch": 0.3791442410006667, "grad_norm": 1.7015947103500366, "learning_rate": 7.129910841522225e-06, "loss": 0.8074, "step": 10734 }, { "epoch": 0.3791795628043746, "grad_norm": 1.5753852128982544, "learning_rate": 7.129393312196957e-06, "loss": 0.8147, "step": 10735 }, { "epoch": 0.3792148846080825, "grad_norm": 2.2045812606811523, "learning_rate": 7.1288757550025876e-06, "loss": 0.8005, "step": 10736 }, { "epoch": 0.3792502064117904, "grad_norm": 1.7163116931915283, "learning_rate": 7.128358169945892e-06, "loss": 0.7799, "step": 10737 }, { "epoch": 0.3792855282154983, "grad_norm": 1.8436026573181152, "learning_rate": 7.127840557033647e-06, "loss": 0.8061, "step": 10738 }, { "epoch": 0.37932085001920623, "grad_norm": 1.6674857139587402, "learning_rate": 7.1273229162726235e-06, "loss": 0.7838, "step": 10739 }, { "epoch": 0.37935617182291415, "grad_norm": 1.6793278455734253, "learning_rate": 7.126805247669598e-06, "loss": 0.8079, "step": 10740 }, { "epoch": 0.37939149362662206, "grad_norm": 1.6925642490386963, "learning_rate": 7.126287551231346e-06, "loss": 0.841, "step": 10741 }, { "epoch": 0.37942681543032997, "grad_norm": 1.678375244140625, "learning_rate": 7.12576982696464e-06, "loss": 0.8615, "step": 10742 }, { "epoch": 0.3794621372340379, "grad_norm": 1.7764579057693481, "learning_rate": 7.12525207487626e-06, "loss": 0.8024, "step": 10743 }, { "epoch": 0.3794974590377458, "grad_norm": 1.5295186042785645, "learning_rate": 7.124734294972979e-06, "loss": 0.8007, "step": 10744 }, { "epoch": 0.37953278084145364, "grad_norm": 3.4498252868652344, "learning_rate": 7.124216487261577e-06, "loss": 0.8233, "step": 10745 }, { "epoch": 0.37956810264516155, "grad_norm": 1.957217812538147, "learning_rate": 7.1236986517488275e-06, "loss": 0.8271, "step": 10746 }, { "epoch": 0.37960342444886946, "grad_norm": 1.6561496257781982, "learning_rate": 7.123180788441508e-06, "loss": 0.8302, "step": 10747 }, { "epoch": 0.3796387462525774, "grad_norm": 1.6325485706329346, "learning_rate": 7.122662897346399e-06, "loss": 0.8073, "step": 10748 }, { "epoch": 0.3796740680562853, "grad_norm": 1.4453572034835815, "learning_rate": 7.122144978470278e-06, "loss": 0.8029, "step": 10749 }, { "epoch": 0.3797093898599932, "grad_norm": 1.876183271408081, "learning_rate": 7.1216270318199195e-06, "loss": 0.8374, "step": 10750 }, { "epoch": 0.3797447116637011, "grad_norm": 1.6906334161758423, "learning_rate": 7.121109057402108e-06, "loss": 0.8198, "step": 10751 }, { "epoch": 0.379780033467409, "grad_norm": 1.4540483951568604, "learning_rate": 7.120591055223619e-06, "loss": 0.777, "step": 10752 }, { "epoch": 0.37981535527111693, "grad_norm": 1.7419108152389526, "learning_rate": 7.120073025291232e-06, "loss": 0.8451, "step": 10753 }, { "epoch": 0.37985067707482484, "grad_norm": 1.7114317417144775, "learning_rate": 7.119554967611731e-06, "loss": 0.7978, "step": 10754 }, { "epoch": 0.37988599887853275, "grad_norm": 1.648567795753479, "learning_rate": 7.119036882191889e-06, "loss": 0.8058, "step": 10755 }, { "epoch": 0.37992132068224066, "grad_norm": 1.5468835830688477, "learning_rate": 7.118518769038491e-06, "loss": 0.8382, "step": 10756 }, { "epoch": 0.37995664248594857, "grad_norm": 1.6307979822158813, "learning_rate": 7.11800062815832e-06, "loss": 0.8259, "step": 10757 }, { "epoch": 0.3799919642896564, "grad_norm": 1.6482223272323608, "learning_rate": 7.117482459558154e-06, "loss": 0.7938, "step": 10758 }, { "epoch": 0.38002728609336434, "grad_norm": 1.5773215293884277, "learning_rate": 7.116964263244774e-06, "loss": 0.8169, "step": 10759 }, { "epoch": 0.38006260789707225, "grad_norm": 1.4990438222885132, "learning_rate": 7.116446039224964e-06, "loss": 0.7878, "step": 10760 }, { "epoch": 0.38009792970078016, "grad_norm": 1.6494600772857666, "learning_rate": 7.115927787505508e-06, "loss": 0.838, "step": 10761 }, { "epoch": 0.38013325150448807, "grad_norm": 1.6320266723632812, "learning_rate": 7.115409508093186e-06, "loss": 0.8058, "step": 10762 }, { "epoch": 0.380168573308196, "grad_norm": 1.5469578504562378, "learning_rate": 7.114891200994782e-06, "loss": 0.8313, "step": 10763 }, { "epoch": 0.3802038951119039, "grad_norm": 1.510811686515808, "learning_rate": 7.114372866217081e-06, "loss": 0.8255, "step": 10764 }, { "epoch": 0.3802392169156118, "grad_norm": 1.7889920473098755, "learning_rate": 7.1138545037668626e-06, "loss": 0.8402, "step": 10765 }, { "epoch": 0.3802745387193197, "grad_norm": 1.7323734760284424, "learning_rate": 7.1133361136509145e-06, "loss": 0.8337, "step": 10766 }, { "epoch": 0.3803098605230276, "grad_norm": 1.6151477098464966, "learning_rate": 7.112817695876022e-06, "loss": 0.8371, "step": 10767 }, { "epoch": 0.38034518232673553, "grad_norm": 1.6298699378967285, "learning_rate": 7.112299250448967e-06, "loss": 0.7882, "step": 10768 }, { "epoch": 0.38038050413044344, "grad_norm": 1.6151450872421265, "learning_rate": 7.111780777376538e-06, "loss": 0.8396, "step": 10769 }, { "epoch": 0.38041582593415135, "grad_norm": 1.7361851930618286, "learning_rate": 7.111262276665518e-06, "loss": 0.8126, "step": 10770 }, { "epoch": 0.3804511477378592, "grad_norm": 1.0161528587341309, "learning_rate": 7.110743748322695e-06, "loss": 0.6038, "step": 10771 }, { "epoch": 0.3804864695415671, "grad_norm": 1.6495126485824585, "learning_rate": 7.110225192354855e-06, "loss": 0.8223, "step": 10772 }, { "epoch": 0.38052179134527503, "grad_norm": 1.655094861984253, "learning_rate": 7.1097066087687825e-06, "loss": 0.8146, "step": 10773 }, { "epoch": 0.38055711314898294, "grad_norm": 1.9067964553833008, "learning_rate": 7.109187997571269e-06, "loss": 0.818, "step": 10774 }, { "epoch": 0.38059243495269085, "grad_norm": 1.6178301572799683, "learning_rate": 7.108669358769098e-06, "loss": 0.8226, "step": 10775 }, { "epoch": 0.38062775675639876, "grad_norm": 1.5015252828598022, "learning_rate": 7.108150692369058e-06, "loss": 0.8212, "step": 10776 }, { "epoch": 0.38066307856010667, "grad_norm": 1.5538071393966675, "learning_rate": 7.10763199837794e-06, "loss": 0.8174, "step": 10777 }, { "epoch": 0.3806984003638146, "grad_norm": 1.6886881589889526, "learning_rate": 7.1071132768025285e-06, "loss": 0.8045, "step": 10778 }, { "epoch": 0.3807337221675225, "grad_norm": 1.7187862396240234, "learning_rate": 7.106594527649615e-06, "loss": 0.8253, "step": 10779 }, { "epoch": 0.3807690439712304, "grad_norm": 1.5901981592178345, "learning_rate": 7.106075750925989e-06, "loss": 0.8122, "step": 10780 }, { "epoch": 0.3808043657749383, "grad_norm": 1.7710270881652832, "learning_rate": 7.10555694663844e-06, "loss": 0.8209, "step": 10781 }, { "epoch": 0.3808396875786462, "grad_norm": 2.1161704063415527, "learning_rate": 7.105038114793756e-06, "loss": 0.8015, "step": 10782 }, { "epoch": 0.38087500938235413, "grad_norm": 2.568267583847046, "learning_rate": 7.104519255398731e-06, "loss": 0.7981, "step": 10783 }, { "epoch": 0.380910331186062, "grad_norm": 1.7596874237060547, "learning_rate": 7.104000368460151e-06, "loss": 0.783, "step": 10784 }, { "epoch": 0.3809456529897699, "grad_norm": 1.5742968320846558, "learning_rate": 7.1034814539848115e-06, "loss": 0.7885, "step": 10785 }, { "epoch": 0.3809809747934778, "grad_norm": 1.91454017162323, "learning_rate": 7.102962511979502e-06, "loss": 0.8375, "step": 10786 }, { "epoch": 0.3810162965971857, "grad_norm": 1.9145041704177856, "learning_rate": 7.102443542451013e-06, "loss": 0.8664, "step": 10787 }, { "epoch": 0.38105161840089363, "grad_norm": 1.8912159204483032, "learning_rate": 7.1019245454061406e-06, "loss": 0.837, "step": 10788 }, { "epoch": 0.38108694020460154, "grad_norm": 1.6144325733184814, "learning_rate": 7.101405520851672e-06, "loss": 0.8336, "step": 10789 }, { "epoch": 0.38112226200830945, "grad_norm": 1.7948044538497925, "learning_rate": 7.100886468794405e-06, "loss": 0.7956, "step": 10790 }, { "epoch": 0.38115758381201736, "grad_norm": 1.5358660221099854, "learning_rate": 7.100367389241131e-06, "loss": 0.794, "step": 10791 }, { "epoch": 0.3811929056157253, "grad_norm": 3.8709349632263184, "learning_rate": 7.0998482821986426e-06, "loss": 0.8262, "step": 10792 }, { "epoch": 0.3812282274194332, "grad_norm": 1.929593563079834, "learning_rate": 7.099329147673735e-06, "loss": 0.8105, "step": 10793 }, { "epoch": 0.3812635492231411, "grad_norm": 1.6906960010528564, "learning_rate": 7.0988099856732e-06, "loss": 0.8162, "step": 10794 }, { "epoch": 0.381298871026849, "grad_norm": 1.5754368305206299, "learning_rate": 7.098290796203837e-06, "loss": 0.82, "step": 10795 }, { "epoch": 0.3813341928305569, "grad_norm": 1.6434240341186523, "learning_rate": 7.0977715792724375e-06, "loss": 0.8314, "step": 10796 }, { "epoch": 0.3813695146342648, "grad_norm": 2.0082263946533203, "learning_rate": 7.097252334885798e-06, "loss": 0.8078, "step": 10797 }, { "epoch": 0.3814048364379727, "grad_norm": 1.798132061958313, "learning_rate": 7.096733063050715e-06, "loss": 0.8371, "step": 10798 }, { "epoch": 0.3814401582416806, "grad_norm": 1.7118427753448486, "learning_rate": 7.096213763773982e-06, "loss": 0.8037, "step": 10799 }, { "epoch": 0.3814754800453885, "grad_norm": 1.597165822982788, "learning_rate": 7.095694437062399e-06, "loss": 0.8061, "step": 10800 }, { "epoch": 0.3815108018490964, "grad_norm": 1.814231514930725, "learning_rate": 7.095175082922761e-06, "loss": 0.832, "step": 10801 }, { "epoch": 0.3815461236528043, "grad_norm": 1.5532265901565552, "learning_rate": 7.0946557013618645e-06, "loss": 0.827, "step": 10802 }, { "epoch": 0.38158144545651224, "grad_norm": 1.6564384698867798, "learning_rate": 7.094136292386509e-06, "loss": 0.8032, "step": 10803 }, { "epoch": 0.38161676726022015, "grad_norm": 1.5809913873672485, "learning_rate": 7.093616856003491e-06, "loss": 0.8346, "step": 10804 }, { "epoch": 0.38165208906392806, "grad_norm": 1.7023043632507324, "learning_rate": 7.0930973922196074e-06, "loss": 0.779, "step": 10805 }, { "epoch": 0.38168741086763597, "grad_norm": 1.679595947265625, "learning_rate": 7.092577901041662e-06, "loss": 0.8123, "step": 10806 }, { "epoch": 0.3817227326713439, "grad_norm": 1.9674583673477173, "learning_rate": 7.092058382476448e-06, "loss": 0.7885, "step": 10807 }, { "epoch": 0.3817580544750518, "grad_norm": 6.630181312561035, "learning_rate": 7.0915388365307666e-06, "loss": 0.7888, "step": 10808 }, { "epoch": 0.3817933762787597, "grad_norm": 1.6226948499679565, "learning_rate": 7.091019263211421e-06, "loss": 0.8501, "step": 10809 }, { "epoch": 0.38182869808246755, "grad_norm": 1.9053614139556885, "learning_rate": 7.090499662525205e-06, "loss": 0.8357, "step": 10810 }, { "epoch": 0.38186401988617547, "grad_norm": 1.6776670217514038, "learning_rate": 7.089980034478925e-06, "loss": 0.8157, "step": 10811 }, { "epoch": 0.3818993416898834, "grad_norm": 1.707252025604248, "learning_rate": 7.089460379079378e-06, "loss": 0.835, "step": 10812 }, { "epoch": 0.3819346634935913, "grad_norm": 1.6394824981689453, "learning_rate": 7.088940696333366e-06, "loss": 0.8161, "step": 10813 }, { "epoch": 0.3819699852972992, "grad_norm": 1.5900360345840454, "learning_rate": 7.088420986247691e-06, "loss": 0.8083, "step": 10814 }, { "epoch": 0.3820053071010071, "grad_norm": 1.59172785282135, "learning_rate": 7.087901248829155e-06, "loss": 0.789, "step": 10815 }, { "epoch": 0.382040628904715, "grad_norm": 1.6288138628005981, "learning_rate": 7.087381484084559e-06, "loss": 0.8313, "step": 10816 }, { "epoch": 0.38207595070842293, "grad_norm": 1.8436672687530518, "learning_rate": 7.086861692020708e-06, "loss": 0.8339, "step": 10817 }, { "epoch": 0.38211127251213084, "grad_norm": 1.6628559827804565, "learning_rate": 7.086341872644404e-06, "loss": 0.8212, "step": 10818 }, { "epoch": 0.38214659431583875, "grad_norm": 1.6210923194885254, "learning_rate": 7.085822025962448e-06, "loss": 0.803, "step": 10819 }, { "epoch": 0.38218191611954666, "grad_norm": 1.5376152992248535, "learning_rate": 7.085302151981646e-06, "loss": 0.7818, "step": 10820 }, { "epoch": 0.38221723792325457, "grad_norm": 1.626640796661377, "learning_rate": 7.0847822507088015e-06, "loss": 0.8097, "step": 10821 }, { "epoch": 0.3822525597269625, "grad_norm": 1.7336193323135376, "learning_rate": 7.084262322150719e-06, "loss": 0.844, "step": 10822 }, { "epoch": 0.38228788153067034, "grad_norm": 0.9302738904953003, "learning_rate": 7.083742366314203e-06, "loss": 0.5983, "step": 10823 }, { "epoch": 0.38232320333437825, "grad_norm": 1.589270830154419, "learning_rate": 7.0832223832060575e-06, "loss": 0.827, "step": 10824 }, { "epoch": 0.38235852513808616, "grad_norm": 1.6062508821487427, "learning_rate": 7.0827023728330905e-06, "loss": 0.7886, "step": 10825 }, { "epoch": 0.38239384694179407, "grad_norm": 2.610736608505249, "learning_rate": 7.082182335202104e-06, "loss": 0.8228, "step": 10826 }, { "epoch": 0.382429168745502, "grad_norm": 1.01430082321167, "learning_rate": 7.081662270319909e-06, "loss": 0.5704, "step": 10827 }, { "epoch": 0.3824644905492099, "grad_norm": 1.659482479095459, "learning_rate": 7.08114217819331e-06, "loss": 0.8173, "step": 10828 }, { "epoch": 0.3824998123529178, "grad_norm": 1.858568549156189, "learning_rate": 7.080622058829112e-06, "loss": 0.8252, "step": 10829 }, { "epoch": 0.3825351341566257, "grad_norm": 1.5706466436386108, "learning_rate": 7.0801019122341245e-06, "loss": 0.8249, "step": 10830 }, { "epoch": 0.3825704559603336, "grad_norm": 1.757698893547058, "learning_rate": 7.0795817384151524e-06, "loss": 0.8231, "step": 10831 }, { "epoch": 0.38260577776404153, "grad_norm": 1.6367050409317017, "learning_rate": 7.079061537379007e-06, "loss": 0.8045, "step": 10832 }, { "epoch": 0.38264109956774944, "grad_norm": 1.739519715309143, "learning_rate": 7.078541309132495e-06, "loss": 0.8662, "step": 10833 }, { "epoch": 0.38267642137145735, "grad_norm": 1.6652717590332031, "learning_rate": 7.078021053682424e-06, "loss": 0.8293, "step": 10834 }, { "epoch": 0.38271174317516526, "grad_norm": 1.5600084066390991, "learning_rate": 7.077500771035605e-06, "loss": 0.7943, "step": 10835 }, { "epoch": 0.3827470649788731, "grad_norm": 1.6154470443725586, "learning_rate": 7.0769804611988456e-06, "loss": 0.7987, "step": 10836 }, { "epoch": 0.38278238678258103, "grad_norm": 1.6792432069778442, "learning_rate": 7.0764601241789574e-06, "loss": 0.7818, "step": 10837 }, { "epoch": 0.38281770858628894, "grad_norm": 0.9610616564750671, "learning_rate": 7.07593975998275e-06, "loss": 0.5981, "step": 10838 }, { "epoch": 0.38285303038999685, "grad_norm": 1.9139378070831299, "learning_rate": 7.075419368617031e-06, "loss": 0.814, "step": 10839 }, { "epoch": 0.38288835219370476, "grad_norm": 1.7636487483978271, "learning_rate": 7.0748989500886145e-06, "loss": 0.8123, "step": 10840 }, { "epoch": 0.3829236739974127, "grad_norm": 1.6297715902328491, "learning_rate": 7.074378504404311e-06, "loss": 0.8085, "step": 10841 }, { "epoch": 0.3829589958011206, "grad_norm": 1.0524530410766602, "learning_rate": 7.073858031570931e-06, "loss": 0.5951, "step": 10842 }, { "epoch": 0.3829943176048285, "grad_norm": 1.594337821006775, "learning_rate": 7.0733375315952875e-06, "loss": 0.8082, "step": 10843 }, { "epoch": 0.3830296394085364, "grad_norm": 1.551060676574707, "learning_rate": 7.072817004484192e-06, "loss": 0.7766, "step": 10844 }, { "epoch": 0.3830649612122443, "grad_norm": 1.7465672492980957, "learning_rate": 7.072296450244456e-06, "loss": 0.8379, "step": 10845 }, { "epoch": 0.3831002830159522, "grad_norm": 1.6883677244186401, "learning_rate": 7.071775868882896e-06, "loss": 0.7967, "step": 10846 }, { "epoch": 0.38313560481966014, "grad_norm": 1.6433889865875244, "learning_rate": 7.071255260406321e-06, "loss": 0.8277, "step": 10847 }, { "epoch": 0.38317092662336805, "grad_norm": 1.799064040184021, "learning_rate": 7.070734624821547e-06, "loss": 0.8463, "step": 10848 }, { "epoch": 0.3832062484270759, "grad_norm": 1.82179594039917, "learning_rate": 7.070213962135388e-06, "loss": 0.8577, "step": 10849 }, { "epoch": 0.3832415702307838, "grad_norm": 1.6809576749801636, "learning_rate": 7.0696932723546565e-06, "loss": 0.8649, "step": 10850 }, { "epoch": 0.3832768920344917, "grad_norm": 1.6677665710449219, "learning_rate": 7.06917255548617e-06, "loss": 0.8093, "step": 10851 }, { "epoch": 0.38331221383819963, "grad_norm": 1.5866671800613403, "learning_rate": 7.06865181153674e-06, "loss": 0.8064, "step": 10852 }, { "epoch": 0.38334753564190754, "grad_norm": 2.4562220573425293, "learning_rate": 7.068131040513187e-06, "loss": 0.843, "step": 10853 }, { "epoch": 0.38338285744561545, "grad_norm": 1.7004430294036865, "learning_rate": 7.067610242422322e-06, "loss": 0.8207, "step": 10854 }, { "epoch": 0.38341817924932337, "grad_norm": 1.5075589418411255, "learning_rate": 7.067089417270962e-06, "loss": 0.7721, "step": 10855 }, { "epoch": 0.3834535010530313, "grad_norm": 1.650244116783142, "learning_rate": 7.066568565065926e-06, "loss": 0.7991, "step": 10856 }, { "epoch": 0.3834888228567392, "grad_norm": 1.5763325691223145, "learning_rate": 7.066047685814027e-06, "loss": 0.7898, "step": 10857 }, { "epoch": 0.3835241446604471, "grad_norm": 1.6045325994491577, "learning_rate": 7.065526779522086e-06, "loss": 0.8055, "step": 10858 }, { "epoch": 0.383559466464155, "grad_norm": 1.848165512084961, "learning_rate": 7.065005846196919e-06, "loss": 0.7829, "step": 10859 }, { "epoch": 0.3835947882678629, "grad_norm": 1.8766074180603027, "learning_rate": 7.064484885845342e-06, "loss": 0.8217, "step": 10860 }, { "epoch": 0.38363011007157083, "grad_norm": 1.5905754566192627, "learning_rate": 7.0639638984741756e-06, "loss": 0.7869, "step": 10861 }, { "epoch": 0.3836654318752787, "grad_norm": 1.6715939044952393, "learning_rate": 7.063442884090239e-06, "loss": 0.8249, "step": 10862 }, { "epoch": 0.3837007536789866, "grad_norm": 1.580747127532959, "learning_rate": 7.062921842700348e-06, "loss": 0.8352, "step": 10863 }, { "epoch": 0.3837360754826945, "grad_norm": 1.5566511154174805, "learning_rate": 7.062400774311325e-06, "loss": 0.831, "step": 10864 }, { "epoch": 0.3837713972864024, "grad_norm": 1.5159778594970703, "learning_rate": 7.061879678929987e-06, "loss": 0.7924, "step": 10865 }, { "epoch": 0.3838067190901103, "grad_norm": 1.5798087120056152, "learning_rate": 7.061358556563157e-06, "loss": 0.7609, "step": 10866 }, { "epoch": 0.38384204089381824, "grad_norm": 1.7632648944854736, "learning_rate": 7.060837407217654e-06, "loss": 0.8365, "step": 10867 }, { "epoch": 0.38387736269752615, "grad_norm": 1.079472303390503, "learning_rate": 7.060316230900295e-06, "loss": 0.5965, "step": 10868 }, { "epoch": 0.38391268450123406, "grad_norm": 1.4731311798095703, "learning_rate": 7.059795027617908e-06, "loss": 0.7787, "step": 10869 }, { "epoch": 0.38394800630494197, "grad_norm": 1.556926965713501, "learning_rate": 7.059273797377309e-06, "loss": 0.7805, "step": 10870 }, { "epoch": 0.3839833281086499, "grad_norm": 1.8475371599197388, "learning_rate": 7.058752540185322e-06, "loss": 0.8106, "step": 10871 }, { "epoch": 0.3840186499123578, "grad_norm": 1.5255711078643799, "learning_rate": 7.058231256048769e-06, "loss": 0.8016, "step": 10872 }, { "epoch": 0.3840539717160657, "grad_norm": 1.6606698036193848, "learning_rate": 7.057709944974472e-06, "loss": 0.8306, "step": 10873 }, { "epoch": 0.3840892935197736, "grad_norm": 1.6959106922149658, "learning_rate": 7.057188606969255e-06, "loss": 0.8, "step": 10874 }, { "epoch": 0.38412461532348147, "grad_norm": 1.6978657245635986, "learning_rate": 7.056667242039939e-06, "loss": 0.8506, "step": 10875 }, { "epoch": 0.3841599371271894, "grad_norm": 1.5160993337631226, "learning_rate": 7.05614585019335e-06, "loss": 0.771, "step": 10876 }, { "epoch": 0.3841952589308973, "grad_norm": 1.62827467918396, "learning_rate": 7.05562443143631e-06, "loss": 0.8234, "step": 10877 }, { "epoch": 0.3842305807346052, "grad_norm": 1.6197612285614014, "learning_rate": 7.055102985775644e-06, "loss": 0.7919, "step": 10878 }, { "epoch": 0.3842659025383131, "grad_norm": 1.5477056503295898, "learning_rate": 7.054581513218175e-06, "loss": 0.8342, "step": 10879 }, { "epoch": 0.384301224342021, "grad_norm": 1.601028561592102, "learning_rate": 7.05406001377073e-06, "loss": 0.8348, "step": 10880 }, { "epoch": 0.38433654614572893, "grad_norm": 1.6531816720962524, "learning_rate": 7.053538487440135e-06, "loss": 0.781, "step": 10881 }, { "epoch": 0.38437186794943684, "grad_norm": 1.6559171676635742, "learning_rate": 7.0530169342332135e-06, "loss": 0.7957, "step": 10882 }, { "epoch": 0.38440718975314475, "grad_norm": 1.6822998523712158, "learning_rate": 7.052495354156793e-06, "loss": 0.825, "step": 10883 }, { "epoch": 0.38444251155685266, "grad_norm": 1.5321033000946045, "learning_rate": 7.051973747217698e-06, "loss": 0.8074, "step": 10884 }, { "epoch": 0.3844778333605606, "grad_norm": 1.5205930471420288, "learning_rate": 7.051452113422756e-06, "loss": 0.8134, "step": 10885 }, { "epoch": 0.3845131551642685, "grad_norm": 1.5047805309295654, "learning_rate": 7.050930452778795e-06, "loss": 0.7961, "step": 10886 }, { "epoch": 0.3845484769679764, "grad_norm": 1.8851155042648315, "learning_rate": 7.050408765292642e-06, "loss": 0.8089, "step": 10887 }, { "epoch": 0.38458379877168425, "grad_norm": 1.7442615032196045, "learning_rate": 7.049887050971124e-06, "loss": 0.827, "step": 10888 }, { "epoch": 0.38461912057539216, "grad_norm": 1.825682520866394, "learning_rate": 7.04936530982107e-06, "loss": 0.8733, "step": 10889 }, { "epoch": 0.38465444237910007, "grad_norm": 1.6580621004104614, "learning_rate": 7.0488435418493085e-06, "loss": 0.8072, "step": 10890 }, { "epoch": 0.384689764182808, "grad_norm": 1.535565733909607, "learning_rate": 7.048321747062668e-06, "loss": 0.8277, "step": 10891 }, { "epoch": 0.3847250859865159, "grad_norm": 1.6681233644485474, "learning_rate": 7.047799925467977e-06, "loss": 0.8405, "step": 10892 }, { "epoch": 0.3847604077902238, "grad_norm": 1.811420202255249, "learning_rate": 7.047278077072066e-06, "loss": 0.8199, "step": 10893 }, { "epoch": 0.3847957295939317, "grad_norm": 1.7416815757751465, "learning_rate": 7.046756201881764e-06, "loss": 0.7965, "step": 10894 }, { "epoch": 0.3848310513976396, "grad_norm": 1.4767292737960815, "learning_rate": 7.0462342999039015e-06, "loss": 0.8322, "step": 10895 }, { "epoch": 0.38486637320134753, "grad_norm": 1.594852089881897, "learning_rate": 7.045712371145309e-06, "loss": 0.8132, "step": 10896 }, { "epoch": 0.38490169500505544, "grad_norm": 1.0739145278930664, "learning_rate": 7.045190415612817e-06, "loss": 0.6084, "step": 10897 }, { "epoch": 0.38493701680876335, "grad_norm": 1.5831003189086914, "learning_rate": 7.044668433313257e-06, "loss": 0.8304, "step": 10898 }, { "epoch": 0.38497233861247127, "grad_norm": 1.699789047241211, "learning_rate": 7.044146424253463e-06, "loss": 0.8162, "step": 10899 }, { "epoch": 0.3850076604161792, "grad_norm": 1.8646461963653564, "learning_rate": 7.043624388440262e-06, "loss": 0.8058, "step": 10900 }, { "epoch": 0.38504298221988703, "grad_norm": 1.5076979398727417, "learning_rate": 7.0431023258804935e-06, "loss": 0.7814, "step": 10901 }, { "epoch": 0.38507830402359494, "grad_norm": 1.5363733768463135, "learning_rate": 7.042580236580982e-06, "loss": 0.8379, "step": 10902 }, { "epoch": 0.38511362582730285, "grad_norm": 1.68241548538208, "learning_rate": 7.042058120548565e-06, "loss": 0.7994, "step": 10903 }, { "epoch": 0.38514894763101076, "grad_norm": 1.629049301147461, "learning_rate": 7.041535977790077e-06, "loss": 0.7846, "step": 10904 }, { "epoch": 0.3851842694347187, "grad_norm": 1.7327338457107544, "learning_rate": 7.041013808312347e-06, "loss": 0.852, "step": 10905 }, { "epoch": 0.3852195912384266, "grad_norm": 1.7062245607376099, "learning_rate": 7.040491612122214e-06, "loss": 0.764, "step": 10906 }, { "epoch": 0.3852549130421345, "grad_norm": 1.7696361541748047, "learning_rate": 7.039969389226511e-06, "loss": 0.8299, "step": 10907 }, { "epoch": 0.3852902348458424, "grad_norm": 1.8693580627441406, "learning_rate": 7.0394471396320694e-06, "loss": 0.7873, "step": 10908 }, { "epoch": 0.3853255566495503, "grad_norm": 1.6904017925262451, "learning_rate": 7.038924863345729e-06, "loss": 0.8176, "step": 10909 }, { "epoch": 0.3853608784532582, "grad_norm": 1.7324124574661255, "learning_rate": 7.038402560374322e-06, "loss": 0.7794, "step": 10910 }, { "epoch": 0.38539620025696614, "grad_norm": 1.7058136463165283, "learning_rate": 7.037880230724687e-06, "loss": 0.7879, "step": 10911 }, { "epoch": 0.38543152206067405, "grad_norm": 1.6831589937210083, "learning_rate": 7.037357874403659e-06, "loss": 0.8289, "step": 10912 }, { "epoch": 0.38546684386438196, "grad_norm": 1.535962700843811, "learning_rate": 7.036835491418073e-06, "loss": 0.8118, "step": 10913 }, { "epoch": 0.3855021656680898, "grad_norm": 1.9189050197601318, "learning_rate": 7.036313081774766e-06, "loss": 0.8468, "step": 10914 }, { "epoch": 0.3855374874717977, "grad_norm": 1.5151268243789673, "learning_rate": 7.035790645480577e-06, "loss": 0.8094, "step": 10915 }, { "epoch": 0.38557280927550563, "grad_norm": 1.5075714588165283, "learning_rate": 7.035268182542343e-06, "loss": 0.7743, "step": 10916 }, { "epoch": 0.38560813107921355, "grad_norm": 1.4319504499435425, "learning_rate": 7.034745692966902e-06, "loss": 0.8, "step": 10917 }, { "epoch": 0.38564345288292146, "grad_norm": 1.5436099767684937, "learning_rate": 7.03422317676109e-06, "loss": 0.8472, "step": 10918 }, { "epoch": 0.38567877468662937, "grad_norm": 1.841348648071289, "learning_rate": 7.033700633931749e-06, "loss": 0.8267, "step": 10919 }, { "epoch": 0.3857140964903373, "grad_norm": 2.9248595237731934, "learning_rate": 7.033178064485717e-06, "loss": 0.8171, "step": 10920 }, { "epoch": 0.3857494182940452, "grad_norm": 1.7201226949691772, "learning_rate": 7.032655468429832e-06, "loss": 0.7992, "step": 10921 }, { "epoch": 0.3857847400977531, "grad_norm": 1.615941047668457, "learning_rate": 7.032132845770935e-06, "loss": 0.8318, "step": 10922 }, { "epoch": 0.385820061901461, "grad_norm": 1.743506908416748, "learning_rate": 7.031610196515863e-06, "loss": 0.8078, "step": 10923 }, { "epoch": 0.3858553837051689, "grad_norm": 1.7790974378585815, "learning_rate": 7.03108752067146e-06, "loss": 0.8519, "step": 10924 }, { "epoch": 0.38589070550887683, "grad_norm": 1.555121898651123, "learning_rate": 7.0305648182445665e-06, "loss": 0.806, "step": 10925 }, { "epoch": 0.38592602731258474, "grad_norm": 1.7232600450515747, "learning_rate": 7.03004208924202e-06, "loss": 0.8216, "step": 10926 }, { "epoch": 0.3859613491162926, "grad_norm": 1.6526182889938354, "learning_rate": 7.029519333670667e-06, "loss": 0.792, "step": 10927 }, { "epoch": 0.3859966709200005, "grad_norm": 1.7337167263031006, "learning_rate": 7.028996551537345e-06, "loss": 0.8527, "step": 10928 }, { "epoch": 0.3860319927237084, "grad_norm": 1.7562133073806763, "learning_rate": 7.028473742848898e-06, "loss": 0.8001, "step": 10929 }, { "epoch": 0.3860673145274163, "grad_norm": 1.6459121704101562, "learning_rate": 7.027950907612168e-06, "loss": 0.8139, "step": 10930 }, { "epoch": 0.38610263633112424, "grad_norm": 1.5834109783172607, "learning_rate": 7.027428045833996e-06, "loss": 0.819, "step": 10931 }, { "epoch": 0.38613795813483215, "grad_norm": 1.7410968542099, "learning_rate": 7.026905157521229e-06, "loss": 0.8292, "step": 10932 }, { "epoch": 0.38617327993854006, "grad_norm": 1.6743794679641724, "learning_rate": 7.026382242680707e-06, "loss": 0.8187, "step": 10933 }, { "epoch": 0.38620860174224797, "grad_norm": 1.785470724105835, "learning_rate": 7.025859301319275e-06, "loss": 0.8424, "step": 10934 }, { "epoch": 0.3862439235459559, "grad_norm": 1.693966031074524, "learning_rate": 7.025336333443778e-06, "loss": 0.8318, "step": 10935 }, { "epoch": 0.3862792453496638, "grad_norm": 1.8062968254089355, "learning_rate": 7.02481333906106e-06, "loss": 0.7869, "step": 10936 }, { "epoch": 0.3863145671533717, "grad_norm": 1.5766311883926392, "learning_rate": 7.0242903181779655e-06, "loss": 0.7979, "step": 10937 }, { "epoch": 0.3863498889570796, "grad_norm": 1.4973721504211426, "learning_rate": 7.02376727080134e-06, "loss": 0.8021, "step": 10938 }, { "epoch": 0.3863852107607875, "grad_norm": 1.6377215385437012, "learning_rate": 7.023244196938029e-06, "loss": 0.8065, "step": 10939 }, { "epoch": 0.38642053256449543, "grad_norm": 1.7182382345199585, "learning_rate": 7.022721096594878e-06, "loss": 0.7999, "step": 10940 }, { "epoch": 0.3864558543682033, "grad_norm": 1.733599066734314, "learning_rate": 7.022197969778733e-06, "loss": 0.8262, "step": 10941 }, { "epoch": 0.3864911761719112, "grad_norm": 1.594765543937683, "learning_rate": 7.021674816496441e-06, "loss": 0.8009, "step": 10942 }, { "epoch": 0.3865264979756191, "grad_norm": 1.6921559572219849, "learning_rate": 7.0211516367548505e-06, "loss": 0.8156, "step": 10943 }, { "epoch": 0.386561819779327, "grad_norm": 1.6565051078796387, "learning_rate": 7.0206284305608054e-06, "loss": 0.7613, "step": 10944 }, { "epoch": 0.38659714158303493, "grad_norm": 1.5957599878311157, "learning_rate": 7.020105197921157e-06, "loss": 0.7958, "step": 10945 }, { "epoch": 0.38663246338674284, "grad_norm": 1.7547712326049805, "learning_rate": 7.01958193884275e-06, "loss": 0.8226, "step": 10946 }, { "epoch": 0.38666778519045075, "grad_norm": 1.651131510734558, "learning_rate": 7.019058653332434e-06, "loss": 0.854, "step": 10947 }, { "epoch": 0.38670310699415866, "grad_norm": 1.5156066417694092, "learning_rate": 7.01853534139706e-06, "loss": 0.7684, "step": 10948 }, { "epoch": 0.3867384287978666, "grad_norm": 1.6379668712615967, "learning_rate": 7.018012003043473e-06, "loss": 0.8136, "step": 10949 }, { "epoch": 0.3867737506015745, "grad_norm": 1.5766927003860474, "learning_rate": 7.017488638278525e-06, "loss": 0.8252, "step": 10950 }, { "epoch": 0.3868090724052824, "grad_norm": 1.9729796648025513, "learning_rate": 7.016965247109065e-06, "loss": 0.8446, "step": 10951 }, { "epoch": 0.3868443942089903, "grad_norm": 1.851024866104126, "learning_rate": 7.016441829541942e-06, "loss": 0.8544, "step": 10952 }, { "epoch": 0.3868797160126982, "grad_norm": 1.6068201065063477, "learning_rate": 7.01591838558401e-06, "loss": 0.8051, "step": 10953 }, { "epoch": 0.38691503781640607, "grad_norm": 1.6232842206954956, "learning_rate": 7.015394915242115e-06, "loss": 0.8096, "step": 10954 }, { "epoch": 0.386950359620114, "grad_norm": 1.7371547222137451, "learning_rate": 7.0148714185231104e-06, "loss": 0.8189, "step": 10955 }, { "epoch": 0.3869856814238219, "grad_norm": 1.798336148262024, "learning_rate": 7.0143478954338474e-06, "loss": 0.824, "step": 10956 }, { "epoch": 0.3870210032275298, "grad_norm": 1.7606797218322754, "learning_rate": 7.013824345981179e-06, "loss": 0.828, "step": 10957 }, { "epoch": 0.3870563250312377, "grad_norm": 1.6387664079666138, "learning_rate": 7.013300770171955e-06, "loss": 0.805, "step": 10958 }, { "epoch": 0.3870916468349456, "grad_norm": 1.5860353708267212, "learning_rate": 7.012777168013028e-06, "loss": 0.8149, "step": 10959 }, { "epoch": 0.38712696863865353, "grad_norm": 2.2375328540802, "learning_rate": 7.012253539511254e-06, "loss": 0.8011, "step": 10960 }, { "epoch": 0.38716229044236145, "grad_norm": 1.660356879234314, "learning_rate": 7.0117298846734815e-06, "loss": 0.8045, "step": 10961 }, { "epoch": 0.38719761224606936, "grad_norm": 1.5472204685211182, "learning_rate": 7.0112062035065685e-06, "loss": 0.7919, "step": 10962 }, { "epoch": 0.38723293404977727, "grad_norm": 1.6467596292495728, "learning_rate": 7.0106824960173655e-06, "loss": 0.8168, "step": 10963 }, { "epoch": 0.3872682558534852, "grad_norm": 1.5045815706253052, "learning_rate": 7.0101587622127284e-06, "loss": 0.8398, "step": 10964 }, { "epoch": 0.3873035776571931, "grad_norm": 1.7479710578918457, "learning_rate": 7.0096350020995114e-06, "loss": 0.8387, "step": 10965 }, { "epoch": 0.387338899460901, "grad_norm": 1.798864722251892, "learning_rate": 7.0091112156845696e-06, "loss": 0.8274, "step": 10966 }, { "epoch": 0.38737422126460885, "grad_norm": 1.7030024528503418, "learning_rate": 7.008587402974758e-06, "loss": 0.8353, "step": 10967 }, { "epoch": 0.38740954306831676, "grad_norm": 1.9260282516479492, "learning_rate": 7.008063563976931e-06, "loss": 0.8129, "step": 10968 }, { "epoch": 0.3874448648720247, "grad_norm": 1.8173253536224365, "learning_rate": 7.0075396986979476e-06, "loss": 0.8477, "step": 10969 }, { "epoch": 0.3874801866757326, "grad_norm": 1.6963530778884888, "learning_rate": 7.007015807144659e-06, "loss": 0.839, "step": 10970 }, { "epoch": 0.3875155084794405, "grad_norm": 1.4705653190612793, "learning_rate": 7.006491889323926e-06, "loss": 0.7604, "step": 10971 }, { "epoch": 0.3875508302831484, "grad_norm": 1.6110689640045166, "learning_rate": 7.005967945242605e-06, "loss": 0.7976, "step": 10972 }, { "epoch": 0.3875861520868563, "grad_norm": 1.6675140857696533, "learning_rate": 7.005443974907552e-06, "loss": 0.7877, "step": 10973 }, { "epoch": 0.3876214738905642, "grad_norm": 1.216681718826294, "learning_rate": 7.0049199783256265e-06, "loss": 0.5939, "step": 10974 }, { "epoch": 0.38765679569427214, "grad_norm": 1.6586626768112183, "learning_rate": 7.004395955503685e-06, "loss": 0.7715, "step": 10975 }, { "epoch": 0.38769211749798005, "grad_norm": 1.7036174535751343, "learning_rate": 7.003871906448584e-06, "loss": 0.8443, "step": 10976 }, { "epoch": 0.38772743930168796, "grad_norm": 1.6220885515213013, "learning_rate": 7.0033478311671856e-06, "loss": 0.8302, "step": 10977 }, { "epoch": 0.38776276110539587, "grad_norm": 1.5555739402770996, "learning_rate": 7.002823729666346e-06, "loss": 0.8304, "step": 10978 }, { "epoch": 0.3877980829091038, "grad_norm": 1.623547911643982, "learning_rate": 7.002299601952928e-06, "loss": 0.8185, "step": 10979 }, { "epoch": 0.38783340471281164, "grad_norm": 1.8779828548431396, "learning_rate": 7.0017754480337885e-06, "loss": 0.8046, "step": 10980 }, { "epoch": 0.38786872651651955, "grad_norm": 1.6050430536270142, "learning_rate": 7.001251267915787e-06, "loss": 0.8251, "step": 10981 }, { "epoch": 0.38790404832022746, "grad_norm": 1.5847667455673218, "learning_rate": 7.0007270616057865e-06, "loss": 0.7963, "step": 10982 }, { "epoch": 0.38793937012393537, "grad_norm": 1.7265703678131104, "learning_rate": 7.000202829110646e-06, "loss": 0.8273, "step": 10983 }, { "epoch": 0.3879746919276433, "grad_norm": 1.7193539142608643, "learning_rate": 6.9996785704372275e-06, "loss": 0.8238, "step": 10984 }, { "epoch": 0.3880100137313512, "grad_norm": 1.7458171844482422, "learning_rate": 6.999154285592391e-06, "loss": 0.8234, "step": 10985 }, { "epoch": 0.3880453355350591, "grad_norm": 1.6262093782424927, "learning_rate": 6.998629974582999e-06, "loss": 0.8132, "step": 10986 }, { "epoch": 0.388080657338767, "grad_norm": 2.4482262134552, "learning_rate": 6.998105637415913e-06, "loss": 0.8111, "step": 10987 }, { "epoch": 0.3881159791424749, "grad_norm": 1.8538779020309448, "learning_rate": 6.997581274097998e-06, "loss": 0.7972, "step": 10988 }, { "epoch": 0.38815130094618283, "grad_norm": 1.625681757926941, "learning_rate": 6.997056884636112e-06, "loss": 0.7528, "step": 10989 }, { "epoch": 0.38818662274989074, "grad_norm": 1.50747811794281, "learning_rate": 6.996532469037123e-06, "loss": 0.7716, "step": 10990 }, { "epoch": 0.38822194455359865, "grad_norm": 1.7406097650527954, "learning_rate": 6.996008027307892e-06, "loss": 0.8015, "step": 10991 }, { "epoch": 0.38825726635730656, "grad_norm": 1.9409816265106201, "learning_rate": 6.9954835594552814e-06, "loss": 0.8145, "step": 10992 }, { "epoch": 0.3882925881610144, "grad_norm": 1.7453408241271973, "learning_rate": 6.99495906548616e-06, "loss": 0.8012, "step": 10993 }, { "epoch": 0.38832790996472233, "grad_norm": 1.0093083381652832, "learning_rate": 6.994434545407387e-06, "loss": 0.6113, "step": 10994 }, { "epoch": 0.38836323176843024, "grad_norm": 1.5517492294311523, "learning_rate": 6.993909999225832e-06, "loss": 0.8424, "step": 10995 }, { "epoch": 0.38839855357213815, "grad_norm": 1.601849913597107, "learning_rate": 6.993385426948356e-06, "loss": 0.7795, "step": 10996 }, { "epoch": 0.38843387537584606, "grad_norm": 1.6927493810653687, "learning_rate": 6.9928608285818254e-06, "loss": 0.8059, "step": 10997 }, { "epoch": 0.38846919717955397, "grad_norm": 1.7161390781402588, "learning_rate": 6.992336204133109e-06, "loss": 0.7969, "step": 10998 }, { "epoch": 0.3885045189832619, "grad_norm": 1.6976509094238281, "learning_rate": 6.991811553609069e-06, "loss": 0.8341, "step": 10999 }, { "epoch": 0.3885398407869698, "grad_norm": 1.5378912687301636, "learning_rate": 6.991286877016574e-06, "loss": 0.8087, "step": 11000 }, { "epoch": 0.3885751625906777, "grad_norm": 1.6307599544525146, "learning_rate": 6.990762174362492e-06, "loss": 0.8534, "step": 11001 }, { "epoch": 0.3886104843943856, "grad_norm": 1.5027596950531006, "learning_rate": 6.990237445653688e-06, "loss": 0.8008, "step": 11002 }, { "epoch": 0.3886458061980935, "grad_norm": 1.638787865638733, "learning_rate": 6.989712690897031e-06, "loss": 0.8147, "step": 11003 }, { "epoch": 0.38868112800180143, "grad_norm": 1.6391501426696777, "learning_rate": 6.989187910099389e-06, "loss": 0.7966, "step": 11004 }, { "epoch": 0.38871644980550935, "grad_norm": 1.730345606803894, "learning_rate": 6.988663103267628e-06, "loss": 0.8397, "step": 11005 }, { "epoch": 0.3887517716092172, "grad_norm": 1.5409648418426514, "learning_rate": 6.988138270408619e-06, "loss": 0.8076, "step": 11006 }, { "epoch": 0.3887870934129251, "grad_norm": 0.9521157145500183, "learning_rate": 6.987613411529228e-06, "loss": 0.6186, "step": 11007 }, { "epoch": 0.388822415216633, "grad_norm": 1.603994369506836, "learning_rate": 6.98708852663633e-06, "loss": 0.7725, "step": 11008 }, { "epoch": 0.38885773702034093, "grad_norm": 1.4481409788131714, "learning_rate": 6.986563615736787e-06, "loss": 0.779, "step": 11009 }, { "epoch": 0.38889305882404884, "grad_norm": 1.7153345346450806, "learning_rate": 6.9860386788374755e-06, "loss": 0.8015, "step": 11010 }, { "epoch": 0.38892838062775675, "grad_norm": 1.9141449928283691, "learning_rate": 6.985513715945263e-06, "loss": 0.8258, "step": 11011 }, { "epoch": 0.38896370243146466, "grad_norm": 1.9309028387069702, "learning_rate": 6.984988727067019e-06, "loss": 0.8263, "step": 11012 }, { "epoch": 0.3889990242351726, "grad_norm": 1.5761947631835938, "learning_rate": 6.984463712209617e-06, "loss": 0.7917, "step": 11013 }, { "epoch": 0.3890343460388805, "grad_norm": 1.819451093673706, "learning_rate": 6.983938671379926e-06, "loss": 0.8336, "step": 11014 }, { "epoch": 0.3890696678425884, "grad_norm": 1.5471525192260742, "learning_rate": 6.983413604584819e-06, "loss": 0.8072, "step": 11015 }, { "epoch": 0.3891049896462963, "grad_norm": 1.6206121444702148, "learning_rate": 6.982888511831167e-06, "loss": 0.81, "step": 11016 }, { "epoch": 0.3891403114500042, "grad_norm": 1.8956314325332642, "learning_rate": 6.982363393125844e-06, "loss": 0.7786, "step": 11017 }, { "epoch": 0.3891756332537121, "grad_norm": 1.739010214805603, "learning_rate": 6.98183824847572e-06, "loss": 0.8353, "step": 11018 }, { "epoch": 0.38921095505742, "grad_norm": 1.7114313840866089, "learning_rate": 6.9813130778876705e-06, "loss": 0.7853, "step": 11019 }, { "epoch": 0.3892462768611279, "grad_norm": 1.6705321073532104, "learning_rate": 6.980787881368568e-06, "loss": 0.844, "step": 11020 }, { "epoch": 0.3892815986648358, "grad_norm": 1.469758152961731, "learning_rate": 6.980262658925287e-06, "loss": 0.8092, "step": 11021 }, { "epoch": 0.3893169204685437, "grad_norm": 1.441332221031189, "learning_rate": 6.9797374105647e-06, "loss": 0.7907, "step": 11022 }, { "epoch": 0.3893522422722516, "grad_norm": 1.7922285795211792, "learning_rate": 6.979212136293679e-06, "loss": 0.7932, "step": 11023 }, { "epoch": 0.38938756407595954, "grad_norm": 4.627442359924316, "learning_rate": 6.978686836119105e-06, "loss": 0.8236, "step": 11024 }, { "epoch": 0.38942288587966745, "grad_norm": 1.64902925491333, "learning_rate": 6.97816151004785e-06, "loss": 0.8457, "step": 11025 }, { "epoch": 0.38945820768337536, "grad_norm": 1.5080339908599854, "learning_rate": 6.977636158086786e-06, "loss": 0.7957, "step": 11026 }, { "epoch": 0.38949352948708327, "grad_norm": 1.5769752264022827, "learning_rate": 6.977110780242793e-06, "loss": 0.7989, "step": 11027 }, { "epoch": 0.3895288512907912, "grad_norm": 1.8020950555801392, "learning_rate": 6.976585376522746e-06, "loss": 0.8681, "step": 11028 }, { "epoch": 0.3895641730944991, "grad_norm": 1.6985365152359009, "learning_rate": 6.976059946933521e-06, "loss": 0.8083, "step": 11029 }, { "epoch": 0.389599494898207, "grad_norm": 1.5902875661849976, "learning_rate": 6.975534491481996e-06, "loss": 0.82, "step": 11030 }, { "epoch": 0.3896348167019149, "grad_norm": 1.6772572994232178, "learning_rate": 6.975009010175046e-06, "loss": 0.8326, "step": 11031 }, { "epoch": 0.38967013850562277, "grad_norm": 1.6433029174804688, "learning_rate": 6.974483503019549e-06, "loss": 0.8216, "step": 11032 }, { "epoch": 0.3897054603093307, "grad_norm": 1.7551003694534302, "learning_rate": 6.973957970022385e-06, "loss": 0.85, "step": 11033 }, { "epoch": 0.3897407821130386, "grad_norm": 1.6439827680587769, "learning_rate": 6.973432411190427e-06, "loss": 0.7996, "step": 11034 }, { "epoch": 0.3897761039167465, "grad_norm": 1.6623234748840332, "learning_rate": 6.972906826530559e-06, "loss": 0.8239, "step": 11035 }, { "epoch": 0.3898114257204544, "grad_norm": 1.6902300119400024, "learning_rate": 6.972381216049655e-06, "loss": 0.8272, "step": 11036 }, { "epoch": 0.3898467475241623, "grad_norm": 2.008272171020508, "learning_rate": 6.971855579754598e-06, "loss": 0.8082, "step": 11037 }, { "epoch": 0.38988206932787023, "grad_norm": 1.521384835243225, "learning_rate": 6.971329917652267e-06, "loss": 0.793, "step": 11038 }, { "epoch": 0.38991739113157814, "grad_norm": 1.6680787801742554, "learning_rate": 6.970804229749538e-06, "loss": 0.8063, "step": 11039 }, { "epoch": 0.38995271293528605, "grad_norm": 1.797725796699524, "learning_rate": 6.970278516053297e-06, "loss": 0.8171, "step": 11040 }, { "epoch": 0.38998803473899396, "grad_norm": 1.7236204147338867, "learning_rate": 6.969752776570418e-06, "loss": 0.825, "step": 11041 }, { "epoch": 0.39002335654270187, "grad_norm": 1.1662721633911133, "learning_rate": 6.969227011307786e-06, "loss": 0.6193, "step": 11042 }, { "epoch": 0.3900586783464098, "grad_norm": 1.8432730436325073, "learning_rate": 6.968701220272283e-06, "loss": 0.8353, "step": 11043 }, { "epoch": 0.3900940001501177, "grad_norm": 1.5485998392105103, "learning_rate": 6.968175403470785e-06, "loss": 0.7686, "step": 11044 }, { "epoch": 0.39012932195382555, "grad_norm": 1.7399753332138062, "learning_rate": 6.9676495609101795e-06, "loss": 0.8586, "step": 11045 }, { "epoch": 0.39016464375753346, "grad_norm": 1.6340432167053223, "learning_rate": 6.967123692597346e-06, "loss": 0.7981, "step": 11046 }, { "epoch": 0.39019996556124137, "grad_norm": 1.4856566190719604, "learning_rate": 6.966597798539167e-06, "loss": 0.8259, "step": 11047 }, { "epoch": 0.3902352873649493, "grad_norm": 1.6917812824249268, "learning_rate": 6.966071878742526e-06, "loss": 0.8116, "step": 11048 }, { "epoch": 0.3902706091686572, "grad_norm": 1.5241779088974, "learning_rate": 6.965545933214306e-06, "loss": 0.7997, "step": 11049 }, { "epoch": 0.3903059309723651, "grad_norm": 1.6139438152313232, "learning_rate": 6.96501996196139e-06, "loss": 0.7931, "step": 11050 }, { "epoch": 0.390341252776073, "grad_norm": 1.4669296741485596, "learning_rate": 6.964493964990663e-06, "loss": 0.771, "step": 11051 }, { "epoch": 0.3903765745797809, "grad_norm": 1.5476471185684204, "learning_rate": 6.963967942309007e-06, "loss": 0.801, "step": 11052 }, { "epoch": 0.39041189638348883, "grad_norm": 1.7012213468551636, "learning_rate": 6.963441893923309e-06, "loss": 0.8371, "step": 11053 }, { "epoch": 0.39044721818719674, "grad_norm": 1.7345300912857056, "learning_rate": 6.962915819840452e-06, "loss": 0.763, "step": 11054 }, { "epoch": 0.39048253999090465, "grad_norm": 2.09942364692688, "learning_rate": 6.962389720067321e-06, "loss": 0.8457, "step": 11055 }, { "epoch": 0.39051786179461256, "grad_norm": 2.171602964401245, "learning_rate": 6.961863594610802e-06, "loss": 0.8365, "step": 11056 }, { "epoch": 0.3905531835983205, "grad_norm": 1.6227751970291138, "learning_rate": 6.961337443477783e-06, "loss": 0.8331, "step": 11057 }, { "epoch": 0.39058850540202833, "grad_norm": 1.6684640645980835, "learning_rate": 6.9608112666751474e-06, "loss": 0.7942, "step": 11058 }, { "epoch": 0.39062382720573624, "grad_norm": 1.7535629272460938, "learning_rate": 6.960285064209783e-06, "loss": 0.8048, "step": 11059 }, { "epoch": 0.39065914900944415, "grad_norm": 1.6635481119155884, "learning_rate": 6.959758836088575e-06, "loss": 0.8616, "step": 11060 }, { "epoch": 0.39069447081315206, "grad_norm": 1.6720528602600098, "learning_rate": 6.959232582318412e-06, "loss": 0.8056, "step": 11061 }, { "epoch": 0.39072979261685997, "grad_norm": 1.6722302436828613, "learning_rate": 6.958706302906182e-06, "loss": 0.7921, "step": 11062 }, { "epoch": 0.3907651144205679, "grad_norm": 1.646257758140564, "learning_rate": 6.95817999785877e-06, "loss": 0.8261, "step": 11063 }, { "epoch": 0.3908004362242758, "grad_norm": 1.5266575813293457, "learning_rate": 6.9576536671830684e-06, "loss": 0.7568, "step": 11064 }, { "epoch": 0.3908357580279837, "grad_norm": 1.5343680381774902, "learning_rate": 6.957127310885963e-06, "loss": 0.8289, "step": 11065 }, { "epoch": 0.3908710798316916, "grad_norm": 1.4455885887145996, "learning_rate": 6.956600928974343e-06, "loss": 0.8116, "step": 11066 }, { "epoch": 0.3909064016353995, "grad_norm": 1.7439727783203125, "learning_rate": 6.9560745214550985e-06, "loss": 0.8104, "step": 11067 }, { "epoch": 0.39094172343910744, "grad_norm": 1.7528884410858154, "learning_rate": 6.955548088335118e-06, "loss": 0.7996, "step": 11068 }, { "epoch": 0.39097704524281535, "grad_norm": 1.6728215217590332, "learning_rate": 6.955021629621293e-06, "loss": 0.8567, "step": 11069 }, { "epoch": 0.39101236704652326, "grad_norm": 1.5189048051834106, "learning_rate": 6.95449514532051e-06, "loss": 0.8004, "step": 11070 }, { "epoch": 0.3910476888502311, "grad_norm": 1.6125168800354004, "learning_rate": 6.953968635439663e-06, "loss": 0.8298, "step": 11071 }, { "epoch": 0.391083010653939, "grad_norm": 1.6025813817977905, "learning_rate": 6.953442099985642e-06, "loss": 0.8318, "step": 11072 }, { "epoch": 0.39111833245764693, "grad_norm": 1.695684552192688, "learning_rate": 6.952915538965338e-06, "loss": 0.8068, "step": 11073 }, { "epoch": 0.39115365426135484, "grad_norm": 1.9909639358520508, "learning_rate": 6.9523889523856425e-06, "loss": 0.8507, "step": 11074 }, { "epoch": 0.39118897606506275, "grad_norm": 1.5464047193527222, "learning_rate": 6.951862340253448e-06, "loss": 0.7939, "step": 11075 }, { "epoch": 0.39122429786877067, "grad_norm": 1.6561989784240723, "learning_rate": 6.951335702575644e-06, "loss": 0.8134, "step": 11076 }, { "epoch": 0.3912596196724786, "grad_norm": 1.8501185178756714, "learning_rate": 6.9508090393591274e-06, "loss": 0.8081, "step": 11077 }, { "epoch": 0.3912949414761865, "grad_norm": 1.7514899969100952, "learning_rate": 6.950282350610789e-06, "loss": 0.8055, "step": 11078 }, { "epoch": 0.3913302632798944, "grad_norm": 1.6548975706100464, "learning_rate": 6.949755636337521e-06, "loss": 0.8035, "step": 11079 }, { "epoch": 0.3913655850836023, "grad_norm": 1.6624881029129028, "learning_rate": 6.949228896546216e-06, "loss": 0.8175, "step": 11080 }, { "epoch": 0.3914009068873102, "grad_norm": 1.627714991569519, "learning_rate": 6.948702131243772e-06, "loss": 0.7969, "step": 11081 }, { "epoch": 0.39143622869101813, "grad_norm": 1.6410104036331177, "learning_rate": 6.94817534043708e-06, "loss": 0.7953, "step": 11082 }, { "epoch": 0.39147155049472604, "grad_norm": 1.7261520624160767, "learning_rate": 6.947648524133035e-06, "loss": 0.8095, "step": 11083 }, { "epoch": 0.3915068722984339, "grad_norm": 1.5888010263442993, "learning_rate": 6.947121682338532e-06, "loss": 0.787, "step": 11084 }, { "epoch": 0.3915421941021418, "grad_norm": 1.7132041454315186, "learning_rate": 6.946594815060468e-06, "loss": 0.8002, "step": 11085 }, { "epoch": 0.3915775159058497, "grad_norm": 1.8599867820739746, "learning_rate": 6.946067922305736e-06, "loss": 0.7944, "step": 11086 }, { "epoch": 0.3916128377095576, "grad_norm": 1.668791651725769, "learning_rate": 6.945541004081233e-06, "loss": 0.8226, "step": 11087 }, { "epoch": 0.39164815951326554, "grad_norm": 1.613887906074524, "learning_rate": 6.945014060393855e-06, "loss": 0.8211, "step": 11088 }, { "epoch": 0.39168348131697345, "grad_norm": 1.5764262676239014, "learning_rate": 6.944487091250498e-06, "loss": 0.772, "step": 11089 }, { "epoch": 0.39171880312068136, "grad_norm": 1.8538095951080322, "learning_rate": 6.94396009665806e-06, "loss": 0.8137, "step": 11090 }, { "epoch": 0.39175412492438927, "grad_norm": 1.638340950012207, "learning_rate": 6.943433076623438e-06, "loss": 0.8464, "step": 11091 }, { "epoch": 0.3917894467280972, "grad_norm": 1.8331351280212402, "learning_rate": 6.942906031153528e-06, "loss": 0.7854, "step": 11092 }, { "epoch": 0.3918247685318051, "grad_norm": 1.677306056022644, "learning_rate": 6.94237896025523e-06, "loss": 0.8133, "step": 11093 }, { "epoch": 0.391860090335513, "grad_norm": 1.5954684019088745, "learning_rate": 6.94185186393544e-06, "loss": 0.7931, "step": 11094 }, { "epoch": 0.3918954121392209, "grad_norm": 1.7605276107788086, "learning_rate": 6.941324742201061e-06, "loss": 0.8269, "step": 11095 }, { "epoch": 0.3919307339429288, "grad_norm": 1.694580078125, "learning_rate": 6.940797595058988e-06, "loss": 0.8186, "step": 11096 }, { "epoch": 0.3919660557466367, "grad_norm": 1.7924599647521973, "learning_rate": 6.9402704225161174e-06, "loss": 0.8135, "step": 11097 }, { "epoch": 0.3920013775503446, "grad_norm": 1.7121315002441406, "learning_rate": 6.939743224579356e-06, "loss": 0.795, "step": 11098 }, { "epoch": 0.3920366993540525, "grad_norm": 1.7507174015045166, "learning_rate": 6.939216001255598e-06, "loss": 0.807, "step": 11099 }, { "epoch": 0.3920720211577604, "grad_norm": 1.664644718170166, "learning_rate": 6.938688752551745e-06, "loss": 0.8159, "step": 11100 }, { "epoch": 0.3921073429614683, "grad_norm": 1.4836643934249878, "learning_rate": 6.9381614784746994e-06, "loss": 0.7832, "step": 11101 }, { "epoch": 0.39214266476517623, "grad_norm": 1.6166841983795166, "learning_rate": 6.93763417903136e-06, "loss": 0.8109, "step": 11102 }, { "epoch": 0.39217798656888414, "grad_norm": 1.7120521068572998, "learning_rate": 6.937106854228629e-06, "loss": 0.8127, "step": 11103 }, { "epoch": 0.39221330837259205, "grad_norm": 1.9575413465499878, "learning_rate": 6.936579504073409e-06, "loss": 0.8677, "step": 11104 }, { "epoch": 0.39224863017629996, "grad_norm": 1.6766010522842407, "learning_rate": 6.936052128572599e-06, "loss": 0.8071, "step": 11105 }, { "epoch": 0.3922839519800079, "grad_norm": 1.7382656335830688, "learning_rate": 6.935524727733103e-06, "loss": 0.7907, "step": 11106 }, { "epoch": 0.3923192737837158, "grad_norm": 1.6350589990615845, "learning_rate": 6.9349973015618225e-06, "loss": 0.8119, "step": 11107 }, { "epoch": 0.3923545955874237, "grad_norm": 1.554760456085205, "learning_rate": 6.934469850065662e-06, "loss": 0.8253, "step": 11108 }, { "epoch": 0.3923899173911316, "grad_norm": 1.7959885597229004, "learning_rate": 6.933942373251523e-06, "loss": 0.8006, "step": 11109 }, { "epoch": 0.39242523919483946, "grad_norm": 1.5461063385009766, "learning_rate": 6.933414871126309e-06, "loss": 0.801, "step": 11110 }, { "epoch": 0.39246056099854737, "grad_norm": 1.0195873975753784, "learning_rate": 6.932887343696926e-06, "loss": 0.5812, "step": 11111 }, { "epoch": 0.3924958828022553, "grad_norm": 1.6481406688690186, "learning_rate": 6.932359790970278e-06, "loss": 0.8304, "step": 11112 }, { "epoch": 0.3925312046059632, "grad_norm": 1.8894286155700684, "learning_rate": 6.931832212953264e-06, "loss": 0.8165, "step": 11113 }, { "epoch": 0.3925665264096711, "grad_norm": 1.5855392217636108, "learning_rate": 6.931304609652798e-06, "loss": 0.8438, "step": 11114 }, { "epoch": 0.392601848213379, "grad_norm": 1.5806245803833008, "learning_rate": 6.930776981075777e-06, "loss": 0.7977, "step": 11115 }, { "epoch": 0.3926371700170869, "grad_norm": 1.7152316570281982, "learning_rate": 6.930249327229112e-06, "loss": 0.7858, "step": 11116 }, { "epoch": 0.39267249182079483, "grad_norm": 1.5852466821670532, "learning_rate": 6.929721648119706e-06, "loss": 0.8046, "step": 11117 }, { "epoch": 0.39270781362450274, "grad_norm": 1.5731651782989502, "learning_rate": 6.929193943754465e-06, "loss": 0.8031, "step": 11118 }, { "epoch": 0.39274313542821065, "grad_norm": 1.7503511905670166, "learning_rate": 6.928666214140297e-06, "loss": 0.7942, "step": 11119 }, { "epoch": 0.39277845723191857, "grad_norm": 1.770598292350769, "learning_rate": 6.928138459284108e-06, "loss": 0.7941, "step": 11120 }, { "epoch": 0.3928137790356265, "grad_norm": 3.5709331035614014, "learning_rate": 6.927610679192804e-06, "loss": 0.7583, "step": 11121 }, { "epoch": 0.3928491008393344, "grad_norm": 1.702494502067566, "learning_rate": 6.927082873873296e-06, "loss": 0.8138, "step": 11122 }, { "epoch": 0.39288442264304224, "grad_norm": 1.8424115180969238, "learning_rate": 6.926555043332489e-06, "loss": 0.8258, "step": 11123 }, { "epoch": 0.39291974444675015, "grad_norm": 1.6616145372390747, "learning_rate": 6.926027187577291e-06, "loss": 0.8346, "step": 11124 }, { "epoch": 0.39295506625045806, "grad_norm": 1.697609782218933, "learning_rate": 6.925499306614613e-06, "loss": 0.8424, "step": 11125 }, { "epoch": 0.392990388054166, "grad_norm": 1.8154271841049194, "learning_rate": 6.924971400451359e-06, "loss": 0.8265, "step": 11126 }, { "epoch": 0.3930257098578739, "grad_norm": 1.6239696741104126, "learning_rate": 6.924443469094444e-06, "loss": 0.7766, "step": 11127 }, { "epoch": 0.3930610316615818, "grad_norm": 1.537467360496521, "learning_rate": 6.9239155125507716e-06, "loss": 0.8192, "step": 11128 }, { "epoch": 0.3930963534652897, "grad_norm": 1.6352723836898804, "learning_rate": 6.923387530827257e-06, "loss": 0.7976, "step": 11129 }, { "epoch": 0.3931316752689976, "grad_norm": 1.625956654548645, "learning_rate": 6.922859523930808e-06, "loss": 0.8441, "step": 11130 }, { "epoch": 0.3931669970727055, "grad_norm": 2.0455405712127686, "learning_rate": 6.922331491868334e-06, "loss": 0.8041, "step": 11131 }, { "epoch": 0.39320231887641344, "grad_norm": 1.8371714353561401, "learning_rate": 6.921803434646748e-06, "loss": 0.8138, "step": 11132 }, { "epoch": 0.39323764068012135, "grad_norm": 1.606138825416565, "learning_rate": 6.921275352272958e-06, "loss": 0.8248, "step": 11133 }, { "epoch": 0.39327296248382926, "grad_norm": 1.6538584232330322, "learning_rate": 6.920747244753879e-06, "loss": 0.7879, "step": 11134 }, { "epoch": 0.39330828428753717, "grad_norm": 1.5491315126419067, "learning_rate": 6.920219112096421e-06, "loss": 0.7724, "step": 11135 }, { "epoch": 0.393343606091245, "grad_norm": 1.6236048936843872, "learning_rate": 6.919690954307495e-06, "loss": 0.8248, "step": 11136 }, { "epoch": 0.39337892789495293, "grad_norm": 1.5928683280944824, "learning_rate": 6.919162771394017e-06, "loss": 0.7907, "step": 11137 }, { "epoch": 0.39341424969866085, "grad_norm": 1.587537407875061, "learning_rate": 6.918634563362896e-06, "loss": 0.8095, "step": 11138 }, { "epoch": 0.39344957150236876, "grad_norm": 1.8005601167678833, "learning_rate": 6.918106330221045e-06, "loss": 0.7947, "step": 11139 }, { "epoch": 0.39348489330607667, "grad_norm": 1.6928080320358276, "learning_rate": 6.917578071975381e-06, "loss": 0.8496, "step": 11140 }, { "epoch": 0.3935202151097846, "grad_norm": 1.5474210977554321, "learning_rate": 6.917049788632815e-06, "loss": 0.7989, "step": 11141 }, { "epoch": 0.3935555369134925, "grad_norm": 1.7837547063827515, "learning_rate": 6.916521480200262e-06, "loss": 0.8417, "step": 11142 }, { "epoch": 0.3935908587172004, "grad_norm": 1.7090338468551636, "learning_rate": 6.915993146684636e-06, "loss": 0.8123, "step": 11143 }, { "epoch": 0.3936261805209083, "grad_norm": 1.5380849838256836, "learning_rate": 6.915464788092851e-06, "loss": 0.8273, "step": 11144 }, { "epoch": 0.3936615023246162, "grad_norm": 1.7082664966583252, "learning_rate": 6.914936404431823e-06, "loss": 0.8135, "step": 11145 }, { "epoch": 0.39369682412832413, "grad_norm": 1.6758110523223877, "learning_rate": 6.914407995708469e-06, "loss": 0.8066, "step": 11146 }, { "epoch": 0.39373214593203204, "grad_norm": 1.636217713356018, "learning_rate": 6.9138795619297015e-06, "loss": 0.8054, "step": 11147 }, { "epoch": 0.39376746773573995, "grad_norm": 1.6539983749389648, "learning_rate": 6.913351103102438e-06, "loss": 0.8122, "step": 11148 }, { "epoch": 0.3938027895394478, "grad_norm": 1.6168558597564697, "learning_rate": 6.9128226192335966e-06, "loss": 0.8207, "step": 11149 }, { "epoch": 0.3938381113431557, "grad_norm": 1.4784443378448486, "learning_rate": 6.91229411033009e-06, "loss": 0.7797, "step": 11150 }, { "epoch": 0.3938734331468636, "grad_norm": 1.7421983480453491, "learning_rate": 6.91176557639884e-06, "loss": 0.8151, "step": 11151 }, { "epoch": 0.39390875495057154, "grad_norm": 1.5992594957351685, "learning_rate": 6.911237017446759e-06, "loss": 0.8192, "step": 11152 }, { "epoch": 0.39394407675427945, "grad_norm": 1.758277416229248, "learning_rate": 6.910708433480769e-06, "loss": 0.8113, "step": 11153 }, { "epoch": 0.39397939855798736, "grad_norm": 1.621254801750183, "learning_rate": 6.910179824507785e-06, "loss": 0.8045, "step": 11154 }, { "epoch": 0.39401472036169527, "grad_norm": 1.6847013235092163, "learning_rate": 6.909651190534727e-06, "loss": 0.8124, "step": 11155 }, { "epoch": 0.3940500421654032, "grad_norm": 1.694900631904602, "learning_rate": 6.909122531568514e-06, "loss": 0.8077, "step": 11156 }, { "epoch": 0.3940853639691111, "grad_norm": 1.6231077909469604, "learning_rate": 6.908593847616063e-06, "loss": 0.7598, "step": 11157 }, { "epoch": 0.394120685772819, "grad_norm": 1.8133550882339478, "learning_rate": 6.908065138684295e-06, "loss": 0.7513, "step": 11158 }, { "epoch": 0.3941560075765269, "grad_norm": 1.5630075931549072, "learning_rate": 6.90753640478013e-06, "loss": 0.8191, "step": 11159 }, { "epoch": 0.3941913293802348, "grad_norm": 1.5563843250274658, "learning_rate": 6.9070076459104865e-06, "loss": 0.8109, "step": 11160 }, { "epoch": 0.39422665118394273, "grad_norm": 1.564450979232788, "learning_rate": 6.9064788620822855e-06, "loss": 0.8089, "step": 11161 }, { "epoch": 0.3942619729876506, "grad_norm": 1.5965077877044678, "learning_rate": 6.905950053302446e-06, "loss": 0.8177, "step": 11162 }, { "epoch": 0.3942972947913585, "grad_norm": 1.5515697002410889, "learning_rate": 6.9054212195778924e-06, "loss": 0.7895, "step": 11163 }, { "epoch": 0.3943326165950664, "grad_norm": 1.5809165239334106, "learning_rate": 6.904892360915543e-06, "loss": 0.829, "step": 11164 }, { "epoch": 0.3943679383987743, "grad_norm": 1.7150851488113403, "learning_rate": 6.90436347732232e-06, "loss": 0.7947, "step": 11165 }, { "epoch": 0.39440326020248223, "grad_norm": 1.7148388624191284, "learning_rate": 6.903834568805148e-06, "loss": 0.8401, "step": 11166 }, { "epoch": 0.39443858200619014, "grad_norm": 1.5937352180480957, "learning_rate": 6.903305635370945e-06, "loss": 0.8055, "step": 11167 }, { "epoch": 0.39447390380989805, "grad_norm": 1.735068917274475, "learning_rate": 6.902776677026636e-06, "loss": 0.8337, "step": 11168 }, { "epoch": 0.39450922561360596, "grad_norm": 1.8028761148452759, "learning_rate": 6.902247693779145e-06, "loss": 0.8214, "step": 11169 }, { "epoch": 0.3945445474173139, "grad_norm": 1.8467925786972046, "learning_rate": 6.901718685635393e-06, "loss": 0.8082, "step": 11170 }, { "epoch": 0.3945798692210218, "grad_norm": 1.7038453817367554, "learning_rate": 6.9011896526023025e-06, "loss": 0.8088, "step": 11171 }, { "epoch": 0.3946151910247297, "grad_norm": 1.7013603448867798, "learning_rate": 6.900660594686801e-06, "loss": 0.8048, "step": 11172 }, { "epoch": 0.3946505128284376, "grad_norm": 1.5315580368041992, "learning_rate": 6.90013151189581e-06, "loss": 0.8019, "step": 11173 }, { "epoch": 0.3946858346321455, "grad_norm": 1.5375620126724243, "learning_rate": 6.899602404236257e-06, "loss": 0.8072, "step": 11174 }, { "epoch": 0.39472115643585337, "grad_norm": 1.5792598724365234, "learning_rate": 6.8990732717150625e-06, "loss": 0.8246, "step": 11175 }, { "epoch": 0.3947564782395613, "grad_norm": 1.5856624841690063, "learning_rate": 6.898544114339154e-06, "loss": 0.76, "step": 11176 }, { "epoch": 0.3947918000432692, "grad_norm": 1.6856452226638794, "learning_rate": 6.898014932115458e-06, "loss": 0.8572, "step": 11177 }, { "epoch": 0.3948271218469771, "grad_norm": 1.803838849067688, "learning_rate": 6.8974857250509e-06, "loss": 0.8473, "step": 11178 }, { "epoch": 0.394862443650685, "grad_norm": 1.5863288640975952, "learning_rate": 6.896956493152405e-06, "loss": 0.8424, "step": 11179 }, { "epoch": 0.3948977654543929, "grad_norm": 1.7553927898406982, "learning_rate": 6.896427236426899e-06, "loss": 0.785, "step": 11180 }, { "epoch": 0.39493308725810083, "grad_norm": 1.7787063121795654, "learning_rate": 6.89589795488131e-06, "loss": 0.8127, "step": 11181 }, { "epoch": 0.39496840906180875, "grad_norm": 1.6117181777954102, "learning_rate": 6.895368648522567e-06, "loss": 0.8, "step": 11182 }, { "epoch": 0.39500373086551666, "grad_norm": 1.6624195575714111, "learning_rate": 6.894839317357593e-06, "loss": 0.7995, "step": 11183 }, { "epoch": 0.39503905266922457, "grad_norm": 1.7842541933059692, "learning_rate": 6.894309961393319e-06, "loss": 0.8076, "step": 11184 }, { "epoch": 0.3950743744729325, "grad_norm": 1.9820568561553955, "learning_rate": 6.893780580636673e-06, "loss": 0.7985, "step": 11185 }, { "epoch": 0.3951096962766404, "grad_norm": 1.706525206565857, "learning_rate": 6.893251175094581e-06, "loss": 0.849, "step": 11186 }, { "epoch": 0.3951450180803483, "grad_norm": 1.6330626010894775, "learning_rate": 6.892721744773975e-06, "loss": 0.7885, "step": 11187 }, { "epoch": 0.39518033988405615, "grad_norm": 1.586869239807129, "learning_rate": 6.892192289681784e-06, "loss": 0.8196, "step": 11188 }, { "epoch": 0.39521566168776406, "grad_norm": 1.6926344633102417, "learning_rate": 6.891662809824933e-06, "loss": 0.8435, "step": 11189 }, { "epoch": 0.395250983491472, "grad_norm": 1.7158828973770142, "learning_rate": 6.891133305210357e-06, "loss": 0.8119, "step": 11190 }, { "epoch": 0.3952863052951799, "grad_norm": 1.6530481576919556, "learning_rate": 6.890603775844982e-06, "loss": 0.7991, "step": 11191 }, { "epoch": 0.3953216270988878, "grad_norm": 1.5619105100631714, "learning_rate": 6.890074221735741e-06, "loss": 0.782, "step": 11192 }, { "epoch": 0.3953569489025957, "grad_norm": 1.6931912899017334, "learning_rate": 6.889544642889564e-06, "loss": 0.8201, "step": 11193 }, { "epoch": 0.3953922707063036, "grad_norm": 1.7406435012817383, "learning_rate": 6.8890150393133815e-06, "loss": 0.7787, "step": 11194 }, { "epoch": 0.3954275925100115, "grad_norm": 1.6327288150787354, "learning_rate": 6.888485411014125e-06, "loss": 0.8504, "step": 11195 }, { "epoch": 0.39546291431371944, "grad_norm": 1.765598177909851, "learning_rate": 6.8879557579987275e-06, "loss": 0.8144, "step": 11196 }, { "epoch": 0.39549823611742735, "grad_norm": 1.6509090662002563, "learning_rate": 6.88742608027412e-06, "loss": 0.8294, "step": 11197 }, { "epoch": 0.39553355792113526, "grad_norm": 1.641867995262146, "learning_rate": 6.8868963778472345e-06, "loss": 0.8356, "step": 11198 }, { "epoch": 0.39556887972484317, "grad_norm": 1.7721608877182007, "learning_rate": 6.886366650725002e-06, "loss": 0.8388, "step": 11199 }, { "epoch": 0.3956042015285511, "grad_norm": 1.581236481666565, "learning_rate": 6.88583689891436e-06, "loss": 0.8078, "step": 11200 }, { "epoch": 0.39563952333225894, "grad_norm": 2.117887020111084, "learning_rate": 6.885307122422239e-06, "loss": 0.8472, "step": 11201 }, { "epoch": 0.39567484513596685, "grad_norm": 1.7867324352264404, "learning_rate": 6.884777321255571e-06, "loss": 0.8209, "step": 11202 }, { "epoch": 0.39571016693967476, "grad_norm": 1.5491863489151, "learning_rate": 6.884247495421292e-06, "loss": 0.8391, "step": 11203 }, { "epoch": 0.39574548874338267, "grad_norm": 1.5686339139938354, "learning_rate": 6.883717644926338e-06, "loss": 0.7967, "step": 11204 }, { "epoch": 0.3957808105470906, "grad_norm": 1.7647931575775146, "learning_rate": 6.883187769777639e-06, "loss": 0.8323, "step": 11205 }, { "epoch": 0.3958161323507985, "grad_norm": 1.668737530708313, "learning_rate": 6.882657869982134e-06, "loss": 0.7815, "step": 11206 }, { "epoch": 0.3958514541545064, "grad_norm": 1.0597708225250244, "learning_rate": 6.882127945546757e-06, "loss": 0.5894, "step": 11207 }, { "epoch": 0.3958867759582143, "grad_norm": 1.6316862106323242, "learning_rate": 6.881597996478442e-06, "loss": 0.8362, "step": 11208 }, { "epoch": 0.3959220977619222, "grad_norm": 1.4461174011230469, "learning_rate": 6.8810680227841275e-06, "loss": 0.7942, "step": 11209 }, { "epoch": 0.39595741956563013, "grad_norm": 1.7226611375808716, "learning_rate": 6.8805380244707465e-06, "loss": 0.8299, "step": 11210 }, { "epoch": 0.39599274136933804, "grad_norm": 1.8513638973236084, "learning_rate": 6.880008001545238e-06, "loss": 0.7942, "step": 11211 }, { "epoch": 0.39602806317304595, "grad_norm": 1.6257829666137695, "learning_rate": 6.8794779540145396e-06, "loss": 0.7984, "step": 11212 }, { "epoch": 0.39606338497675386, "grad_norm": 4.219758987426758, "learning_rate": 6.878947881885586e-06, "loss": 0.7879, "step": 11213 }, { "epoch": 0.3960987067804617, "grad_norm": 1.6082431077957153, "learning_rate": 6.878417785165316e-06, "loss": 0.8052, "step": 11214 }, { "epoch": 0.39613402858416963, "grad_norm": 1.5871639251708984, "learning_rate": 6.877887663860667e-06, "loss": 0.8252, "step": 11215 }, { "epoch": 0.39616935038787754, "grad_norm": 1.6011966466903687, "learning_rate": 6.877357517978577e-06, "loss": 0.8162, "step": 11216 }, { "epoch": 0.39620467219158545, "grad_norm": 1.7022807598114014, "learning_rate": 6.876827347525985e-06, "loss": 0.8324, "step": 11217 }, { "epoch": 0.39623999399529336, "grad_norm": 1.521878957748413, "learning_rate": 6.876297152509829e-06, "loss": 0.8138, "step": 11218 }, { "epoch": 0.39627531579900127, "grad_norm": 1.706190824508667, "learning_rate": 6.875766932937049e-06, "loss": 0.7929, "step": 11219 }, { "epoch": 0.3963106376027092, "grad_norm": 1.726447343826294, "learning_rate": 6.875236688814583e-06, "loss": 0.8086, "step": 11220 }, { "epoch": 0.3963459594064171, "grad_norm": 1.560226559638977, "learning_rate": 6.874706420149373e-06, "loss": 0.7709, "step": 11221 }, { "epoch": 0.396381281210125, "grad_norm": 1.765886902809143, "learning_rate": 6.874176126948358e-06, "loss": 0.8256, "step": 11222 }, { "epoch": 0.3964166030138329, "grad_norm": 1.6032915115356445, "learning_rate": 6.873645809218476e-06, "loss": 0.797, "step": 11223 }, { "epoch": 0.3964519248175408, "grad_norm": 1.5285695791244507, "learning_rate": 6.873115466966672e-06, "loss": 0.7878, "step": 11224 }, { "epoch": 0.39648724662124873, "grad_norm": 2.316654920578003, "learning_rate": 6.872585100199884e-06, "loss": 0.8277, "step": 11225 }, { "epoch": 0.39652256842495665, "grad_norm": 1.0348173379898071, "learning_rate": 6.872054708925056e-06, "loss": 0.6159, "step": 11226 }, { "epoch": 0.3965578902286645, "grad_norm": 2.0046143531799316, "learning_rate": 6.871524293149125e-06, "loss": 0.8233, "step": 11227 }, { "epoch": 0.3965932120323724, "grad_norm": 1.6489453315734863, "learning_rate": 6.870993852879037e-06, "loss": 0.8091, "step": 11228 }, { "epoch": 0.3966285338360803, "grad_norm": 1.574827790260315, "learning_rate": 6.870463388121734e-06, "loss": 0.8153, "step": 11229 }, { "epoch": 0.39666385563978823, "grad_norm": 1.6334757804870605, "learning_rate": 6.869932898884157e-06, "loss": 0.7882, "step": 11230 }, { "epoch": 0.39669917744349614, "grad_norm": 1.7845909595489502, "learning_rate": 6.869402385173249e-06, "loss": 0.7914, "step": 11231 }, { "epoch": 0.39673449924720405, "grad_norm": 2.051891326904297, "learning_rate": 6.868871846995956e-06, "loss": 0.8047, "step": 11232 }, { "epoch": 0.39676982105091196, "grad_norm": 1.6649647951126099, "learning_rate": 6.868341284359219e-06, "loss": 0.8477, "step": 11233 }, { "epoch": 0.3968051428546199, "grad_norm": 1.782724380493164, "learning_rate": 6.867810697269982e-06, "loss": 0.793, "step": 11234 }, { "epoch": 0.3968404646583278, "grad_norm": 1.92826509475708, "learning_rate": 6.86728008573519e-06, "loss": 0.8014, "step": 11235 }, { "epoch": 0.3968757864620357, "grad_norm": 1.6713002920150757, "learning_rate": 6.866749449761786e-06, "loss": 0.8206, "step": 11236 }, { "epoch": 0.3969111082657436, "grad_norm": 1.8104114532470703, "learning_rate": 6.866218789356718e-06, "loss": 0.8428, "step": 11237 }, { "epoch": 0.3969464300694515, "grad_norm": 1.6440430879592896, "learning_rate": 6.865688104526928e-06, "loss": 0.8633, "step": 11238 }, { "epoch": 0.3969817518731594, "grad_norm": 1.6142125129699707, "learning_rate": 6.865157395279363e-06, "loss": 0.8013, "step": 11239 }, { "epoch": 0.3970170736768673, "grad_norm": 1.5897605419158936, "learning_rate": 6.864626661620968e-06, "loss": 0.8185, "step": 11240 }, { "epoch": 0.3970523954805752, "grad_norm": 1.5856454372406006, "learning_rate": 6.86409590355869e-06, "loss": 0.8211, "step": 11241 }, { "epoch": 0.3970877172842831, "grad_norm": 1.819735050201416, "learning_rate": 6.863565121099476e-06, "loss": 0.8328, "step": 11242 }, { "epoch": 0.397123039087991, "grad_norm": 1.619998574256897, "learning_rate": 6.863034314250273e-06, "loss": 0.7958, "step": 11243 }, { "epoch": 0.3971583608916989, "grad_norm": 1.548844575881958, "learning_rate": 6.862503483018025e-06, "loss": 0.8275, "step": 11244 }, { "epoch": 0.39719368269540684, "grad_norm": 1.7474006414413452, "learning_rate": 6.861972627409683e-06, "loss": 0.8338, "step": 11245 }, { "epoch": 0.39722900449911475, "grad_norm": 1.6408812999725342, "learning_rate": 6.861441747432193e-06, "loss": 0.7841, "step": 11246 }, { "epoch": 0.39726432630282266, "grad_norm": 1.6848169565200806, "learning_rate": 6.860910843092502e-06, "loss": 0.7956, "step": 11247 }, { "epoch": 0.39729964810653057, "grad_norm": 1.6317479610443115, "learning_rate": 6.860379914397561e-06, "loss": 0.8157, "step": 11248 }, { "epoch": 0.3973349699102385, "grad_norm": 1.668721079826355, "learning_rate": 6.859848961354316e-06, "loss": 0.799, "step": 11249 }, { "epoch": 0.3973702917139464, "grad_norm": 1.7959176301956177, "learning_rate": 6.859317983969719e-06, "loss": 0.8364, "step": 11250 }, { "epoch": 0.3974056135176543, "grad_norm": 1.6757280826568604, "learning_rate": 6.8587869822507165e-06, "loss": 0.7965, "step": 11251 }, { "epoch": 0.3974409353213622, "grad_norm": 1.8048447370529175, "learning_rate": 6.85825595620426e-06, "loss": 0.8391, "step": 11252 }, { "epoch": 0.39747625712507006, "grad_norm": 1.760812520980835, "learning_rate": 6.857724905837299e-06, "loss": 0.8271, "step": 11253 }, { "epoch": 0.397511578928778, "grad_norm": 1.6384538412094116, "learning_rate": 6.8571938311567845e-06, "loss": 0.808, "step": 11254 }, { "epoch": 0.3975469007324859, "grad_norm": 1.6063381433486938, "learning_rate": 6.856662732169666e-06, "loss": 0.8046, "step": 11255 }, { "epoch": 0.3975822225361938, "grad_norm": 2.104722261428833, "learning_rate": 6.8561316088828935e-06, "loss": 0.8001, "step": 11256 }, { "epoch": 0.3976175443399017, "grad_norm": 1.6160005331039429, "learning_rate": 6.855600461303419e-06, "loss": 0.8041, "step": 11257 }, { "epoch": 0.3976528661436096, "grad_norm": 1.7519181966781616, "learning_rate": 6.855069289438196e-06, "loss": 0.8069, "step": 11258 }, { "epoch": 0.39768818794731753, "grad_norm": 1.7188752889633179, "learning_rate": 6.854538093294174e-06, "loss": 0.8219, "step": 11259 }, { "epoch": 0.39772350975102544, "grad_norm": 1.966689109802246, "learning_rate": 6.854006872878306e-06, "loss": 0.8107, "step": 11260 }, { "epoch": 0.39775883155473335, "grad_norm": 1.7143155336380005, "learning_rate": 6.853475628197546e-06, "loss": 0.8024, "step": 11261 }, { "epoch": 0.39779415335844126, "grad_norm": 1.6664930582046509, "learning_rate": 6.852944359258844e-06, "loss": 0.8148, "step": 11262 }, { "epoch": 0.39782947516214917, "grad_norm": 1.658282995223999, "learning_rate": 6.852413066069155e-06, "loss": 0.7743, "step": 11263 }, { "epoch": 0.3978647969658571, "grad_norm": 1.8064628839492798, "learning_rate": 6.851881748635431e-06, "loss": 0.8071, "step": 11264 }, { "epoch": 0.397900118769565, "grad_norm": 1.636301875114441, "learning_rate": 6.8513504069646274e-06, "loss": 0.8182, "step": 11265 }, { "epoch": 0.39793544057327285, "grad_norm": 1.620644211769104, "learning_rate": 6.850819041063699e-06, "loss": 0.828, "step": 11266 }, { "epoch": 0.39797076237698076, "grad_norm": 1.0918595790863037, "learning_rate": 6.850287650939598e-06, "loss": 0.6087, "step": 11267 }, { "epoch": 0.39800608418068867, "grad_norm": 1.6322346925735474, "learning_rate": 6.8497562365992785e-06, "loss": 0.7788, "step": 11268 }, { "epoch": 0.3980414059843966, "grad_norm": 1.693638563156128, "learning_rate": 6.849224798049699e-06, "loss": 0.804, "step": 11269 }, { "epoch": 0.3980767277881045, "grad_norm": 1.6834532022476196, "learning_rate": 6.848693335297813e-06, "loss": 0.8173, "step": 11270 }, { "epoch": 0.3981120495918124, "grad_norm": 1.5353014469146729, "learning_rate": 6.8481618483505755e-06, "loss": 0.7905, "step": 11271 }, { "epoch": 0.3981473713955203, "grad_norm": 1.7848225831985474, "learning_rate": 6.847630337214944e-06, "loss": 0.8186, "step": 11272 }, { "epoch": 0.3981826931992282, "grad_norm": 1.7946851253509521, "learning_rate": 6.847098801897871e-06, "loss": 0.8083, "step": 11273 }, { "epoch": 0.39821801500293613, "grad_norm": 1.5034334659576416, "learning_rate": 6.846567242406319e-06, "loss": 0.7833, "step": 11274 }, { "epoch": 0.39825333680664404, "grad_norm": 1.6998878717422485, "learning_rate": 6.846035658747242e-06, "loss": 0.8021, "step": 11275 }, { "epoch": 0.39828865861035195, "grad_norm": 1.581216812133789, "learning_rate": 6.845504050927596e-06, "loss": 0.8142, "step": 11276 }, { "epoch": 0.39832398041405986, "grad_norm": 1.6222808361053467, "learning_rate": 6.844972418954341e-06, "loss": 0.7688, "step": 11277 }, { "epoch": 0.3983593022177678, "grad_norm": 1.7031902074813843, "learning_rate": 6.844440762834432e-06, "loss": 0.7605, "step": 11278 }, { "epoch": 0.39839462402147563, "grad_norm": 2.055715799331665, "learning_rate": 6.843909082574831e-06, "loss": 0.8118, "step": 11279 }, { "epoch": 0.39842994582518354, "grad_norm": 1.6568477153778076, "learning_rate": 6.843377378182494e-06, "loss": 0.8174, "step": 11280 }, { "epoch": 0.39846526762889145, "grad_norm": 1.600059986114502, "learning_rate": 6.84284564966438e-06, "loss": 0.7801, "step": 11281 }, { "epoch": 0.39850058943259936, "grad_norm": 1.65780770778656, "learning_rate": 6.842313897027449e-06, "loss": 0.8033, "step": 11282 }, { "epoch": 0.39853591123630727, "grad_norm": 1.6052427291870117, "learning_rate": 6.841782120278659e-06, "loss": 0.771, "step": 11283 }, { "epoch": 0.3985712330400152, "grad_norm": 2.828338623046875, "learning_rate": 6.8412503194249724e-06, "loss": 0.8327, "step": 11284 }, { "epoch": 0.3986065548437231, "grad_norm": 1.800832986831665, "learning_rate": 6.840718494473347e-06, "loss": 0.7941, "step": 11285 }, { "epoch": 0.398641876647431, "grad_norm": 1.67923104763031, "learning_rate": 6.840186645430743e-06, "loss": 0.8329, "step": 11286 }, { "epoch": 0.3986771984511389, "grad_norm": 1.994516372680664, "learning_rate": 6.839654772304124e-06, "loss": 0.7816, "step": 11287 }, { "epoch": 0.3987125202548468, "grad_norm": 1.7028573751449585, "learning_rate": 6.839122875100448e-06, "loss": 0.8167, "step": 11288 }, { "epoch": 0.39874784205855474, "grad_norm": 1.547572135925293, "learning_rate": 6.8385909538266785e-06, "loss": 0.7898, "step": 11289 }, { "epoch": 0.39878316386226265, "grad_norm": 1.4640992879867554, "learning_rate": 6.838059008489776e-06, "loss": 0.799, "step": 11290 }, { "epoch": 0.39881848566597056, "grad_norm": 6.798564910888672, "learning_rate": 6.837527039096701e-06, "loss": 0.835, "step": 11291 }, { "epoch": 0.3988538074696784, "grad_norm": 1.7277940511703491, "learning_rate": 6.836995045654419e-06, "loss": 0.8467, "step": 11292 }, { "epoch": 0.3988891292733863, "grad_norm": 1.7553702592849731, "learning_rate": 6.836463028169891e-06, "loss": 0.8534, "step": 11293 }, { "epoch": 0.39892445107709423, "grad_norm": 1.6755245923995972, "learning_rate": 6.835930986650079e-06, "loss": 0.8205, "step": 11294 }, { "epoch": 0.39895977288080214, "grad_norm": 1.5993131399154663, "learning_rate": 6.835398921101949e-06, "loss": 0.8051, "step": 11295 }, { "epoch": 0.39899509468451005, "grad_norm": 1.5668789148330688, "learning_rate": 6.834866831532463e-06, "loss": 0.7904, "step": 11296 }, { "epoch": 0.39903041648821796, "grad_norm": 1.6011945009231567, "learning_rate": 6.834334717948583e-06, "loss": 0.8217, "step": 11297 }, { "epoch": 0.3990657382919259, "grad_norm": 1.7305402755737305, "learning_rate": 6.833802580357277e-06, "loss": 0.7979, "step": 11298 }, { "epoch": 0.3991010600956338, "grad_norm": 1.7635973691940308, "learning_rate": 6.833270418765506e-06, "loss": 0.8139, "step": 11299 }, { "epoch": 0.3991363818993417, "grad_norm": 1.6199246644973755, "learning_rate": 6.8327382331802375e-06, "loss": 0.8105, "step": 11300 }, { "epoch": 0.3991717037030496, "grad_norm": 1.6386812925338745, "learning_rate": 6.8322060236084355e-06, "loss": 0.8117, "step": 11301 }, { "epoch": 0.3992070255067575, "grad_norm": 1.634389877319336, "learning_rate": 6.831673790057064e-06, "loss": 0.8051, "step": 11302 }, { "epoch": 0.39924234731046543, "grad_norm": 1.6434053182601929, "learning_rate": 6.831141532533092e-06, "loss": 0.7972, "step": 11303 }, { "epoch": 0.39927766911417334, "grad_norm": 1.7194275856018066, "learning_rate": 6.8306092510434825e-06, "loss": 0.83, "step": 11304 }, { "epoch": 0.3993129909178812, "grad_norm": 1.7921019792556763, "learning_rate": 6.830076945595203e-06, "loss": 0.8291, "step": 11305 }, { "epoch": 0.3993483127215891, "grad_norm": 1.505858063697815, "learning_rate": 6.829544616195223e-06, "loss": 0.8057, "step": 11306 }, { "epoch": 0.399383634525297, "grad_norm": 1.4696557521820068, "learning_rate": 6.829012262850504e-06, "loss": 0.7806, "step": 11307 }, { "epoch": 0.3994189563290049, "grad_norm": 1.7495172023773193, "learning_rate": 6.828479885568019e-06, "loss": 0.7915, "step": 11308 }, { "epoch": 0.39945427813271284, "grad_norm": 1.5691845417022705, "learning_rate": 6.827947484354731e-06, "loss": 0.796, "step": 11309 }, { "epoch": 0.39948959993642075, "grad_norm": 1.462336778640747, "learning_rate": 6.82741505921761e-06, "loss": 0.8164, "step": 11310 }, { "epoch": 0.39952492174012866, "grad_norm": 1.5753599405288696, "learning_rate": 6.826882610163626e-06, "loss": 0.7852, "step": 11311 }, { "epoch": 0.39956024354383657, "grad_norm": 1.5509008169174194, "learning_rate": 6.826350137199743e-06, "loss": 0.8169, "step": 11312 }, { "epoch": 0.3995955653475445, "grad_norm": 2.5898351669311523, "learning_rate": 6.825817640332934e-06, "loss": 0.7702, "step": 11313 }, { "epoch": 0.3996308871512524, "grad_norm": 1.6270421743392944, "learning_rate": 6.825285119570169e-06, "loss": 0.8014, "step": 11314 }, { "epoch": 0.3996662089549603, "grad_norm": 1.4477832317352295, "learning_rate": 6.824752574918412e-06, "loss": 0.7908, "step": 11315 }, { "epoch": 0.3997015307586682, "grad_norm": 1.570502519607544, "learning_rate": 6.824220006384638e-06, "loss": 0.8118, "step": 11316 }, { "epoch": 0.3997368525623761, "grad_norm": 1.6712487936019897, "learning_rate": 6.823687413975817e-06, "loss": 0.784, "step": 11317 }, { "epoch": 0.399772174366084, "grad_norm": 1.6892086267471313, "learning_rate": 6.823154797698917e-06, "loss": 0.7948, "step": 11318 }, { "epoch": 0.3998074961697919, "grad_norm": 1.6844288110733032, "learning_rate": 6.822622157560909e-06, "loss": 0.8115, "step": 11319 }, { "epoch": 0.3998428179734998, "grad_norm": 1.8013418912887573, "learning_rate": 6.822089493568764e-06, "loss": 0.8146, "step": 11320 }, { "epoch": 0.3998781397772077, "grad_norm": 1.5802606344223022, "learning_rate": 6.821556805729455e-06, "loss": 0.7861, "step": 11321 }, { "epoch": 0.3999134615809156, "grad_norm": 1.6498991250991821, "learning_rate": 6.821024094049954e-06, "loss": 0.7769, "step": 11322 }, { "epoch": 0.39994878338462353, "grad_norm": 1.8204944133758545, "learning_rate": 6.82049135853723e-06, "loss": 0.8184, "step": 11323 }, { "epoch": 0.39998410518833144, "grad_norm": 1.7413352727890015, "learning_rate": 6.819958599198258e-06, "loss": 0.8062, "step": 11324 }, { "epoch": 0.40001942699203935, "grad_norm": 1.8677880764007568, "learning_rate": 6.819425816040012e-06, "loss": 0.8177, "step": 11325 }, { "epoch": 0.40005474879574726, "grad_norm": 1.5314449071884155, "learning_rate": 6.818893009069461e-06, "loss": 0.7846, "step": 11326 }, { "epoch": 0.40009007059945517, "grad_norm": 1.843509316444397, "learning_rate": 6.818360178293582e-06, "loss": 0.8257, "step": 11327 }, { "epoch": 0.4001253924031631, "grad_norm": 1.8449993133544922, "learning_rate": 6.817827323719344e-06, "loss": 0.8176, "step": 11328 }, { "epoch": 0.400160714206871, "grad_norm": 1.5437400341033936, "learning_rate": 6.817294445353725e-06, "loss": 0.7561, "step": 11329 }, { "epoch": 0.4001960360105789, "grad_norm": 2.055067300796509, "learning_rate": 6.816761543203698e-06, "loss": 0.831, "step": 11330 }, { "epoch": 0.40023135781428676, "grad_norm": 1.721010684967041, "learning_rate": 6.816228617276237e-06, "loss": 0.7919, "step": 11331 }, { "epoch": 0.40026667961799467, "grad_norm": 1.5620899200439453, "learning_rate": 6.815695667578318e-06, "loss": 0.7797, "step": 11332 }, { "epoch": 0.4003020014217026, "grad_norm": 1.5912959575653076, "learning_rate": 6.815162694116915e-06, "loss": 0.8095, "step": 11333 }, { "epoch": 0.4003373232254105, "grad_norm": 1.577921986579895, "learning_rate": 6.814629696899004e-06, "loss": 0.826, "step": 11334 }, { "epoch": 0.4003726450291184, "grad_norm": 1.7255586385726929, "learning_rate": 6.8140966759315605e-06, "loss": 0.7601, "step": 11335 }, { "epoch": 0.4004079668328263, "grad_norm": 1.6554594039916992, "learning_rate": 6.81356363122156e-06, "loss": 0.8273, "step": 11336 }, { "epoch": 0.4004432886365342, "grad_norm": 1.7486250400543213, "learning_rate": 6.813030562775982e-06, "loss": 0.8205, "step": 11337 }, { "epoch": 0.40047861044024213, "grad_norm": 1.8420133590698242, "learning_rate": 6.8124974706018e-06, "loss": 0.8285, "step": 11338 }, { "epoch": 0.40051393224395004, "grad_norm": 2.3479549884796143, "learning_rate": 6.811964354705989e-06, "loss": 0.8204, "step": 11339 }, { "epoch": 0.40054925404765795, "grad_norm": 1.7884855270385742, "learning_rate": 6.811431215095532e-06, "loss": 0.8051, "step": 11340 }, { "epoch": 0.40058457585136586, "grad_norm": 1.6496585607528687, "learning_rate": 6.810898051777402e-06, "loss": 0.7992, "step": 11341 }, { "epoch": 0.4006198976550738, "grad_norm": 1.6058712005615234, "learning_rate": 6.81036486475858e-06, "loss": 0.8213, "step": 11342 }, { "epoch": 0.4006552194587817, "grad_norm": 1.6675945520401, "learning_rate": 6.809831654046043e-06, "loss": 0.8004, "step": 11343 }, { "epoch": 0.40069054126248954, "grad_norm": 1.8228033781051636, "learning_rate": 6.809298419646769e-06, "loss": 0.7893, "step": 11344 }, { "epoch": 0.40072586306619745, "grad_norm": 1.67495858669281, "learning_rate": 6.808765161567738e-06, "loss": 0.8365, "step": 11345 }, { "epoch": 0.40076118486990536, "grad_norm": 1.6483211517333984, "learning_rate": 6.808231879815928e-06, "loss": 0.801, "step": 11346 }, { "epoch": 0.4007965066736133, "grad_norm": 1.8197882175445557, "learning_rate": 6.807698574398319e-06, "loss": 0.772, "step": 11347 }, { "epoch": 0.4008318284773212, "grad_norm": 1.788240909576416, "learning_rate": 6.807165245321892e-06, "loss": 0.8279, "step": 11348 }, { "epoch": 0.4008671502810291, "grad_norm": 1.0209885835647583, "learning_rate": 6.806631892593624e-06, "loss": 0.5802, "step": 11349 }, { "epoch": 0.400902472084737, "grad_norm": 1.4560596942901611, "learning_rate": 6.806098516220499e-06, "loss": 0.7987, "step": 11350 }, { "epoch": 0.4009377938884449, "grad_norm": 1.7040146589279175, "learning_rate": 6.805565116209496e-06, "loss": 0.8036, "step": 11351 }, { "epoch": 0.4009731156921528, "grad_norm": 1.7082138061523438, "learning_rate": 6.8050316925675945e-06, "loss": 0.8115, "step": 11352 }, { "epoch": 0.40100843749586074, "grad_norm": 1.657659888267517, "learning_rate": 6.8044982453017785e-06, "loss": 0.8296, "step": 11353 }, { "epoch": 0.40104375929956865, "grad_norm": 1.9348831176757812, "learning_rate": 6.80396477441903e-06, "loss": 0.8146, "step": 11354 }, { "epoch": 0.40107908110327656, "grad_norm": 1.7362000942230225, "learning_rate": 6.803431279926328e-06, "loss": 0.8051, "step": 11355 }, { "epoch": 0.40111440290698447, "grad_norm": 1.7895793914794922, "learning_rate": 6.802897761830658e-06, "loss": 0.8295, "step": 11356 }, { "epoch": 0.4011497247106924, "grad_norm": 1.5835840702056885, "learning_rate": 6.8023642201389995e-06, "loss": 0.8072, "step": 11357 }, { "epoch": 0.40118504651440023, "grad_norm": 1.3983412981033325, "learning_rate": 6.801830654858337e-06, "loss": 0.7873, "step": 11358 }, { "epoch": 0.40122036831810814, "grad_norm": 0.9663210511207581, "learning_rate": 6.8012970659956545e-06, "loss": 0.6097, "step": 11359 }, { "epoch": 0.40125569012181606, "grad_norm": 1.6098582744598389, "learning_rate": 6.800763453557933e-06, "loss": 0.7945, "step": 11360 }, { "epoch": 0.40129101192552397, "grad_norm": 1.8670662641525269, "learning_rate": 6.800229817552159e-06, "loss": 0.7973, "step": 11361 }, { "epoch": 0.4013263337292319, "grad_norm": 1.9848847389221191, "learning_rate": 6.799696157985316e-06, "loss": 0.7954, "step": 11362 }, { "epoch": 0.4013616555329398, "grad_norm": 1.5994775295257568, "learning_rate": 6.799162474864388e-06, "loss": 0.8409, "step": 11363 }, { "epoch": 0.4013969773366477, "grad_norm": 1.6333428621292114, "learning_rate": 6.798628768196361e-06, "loss": 0.8096, "step": 11364 }, { "epoch": 0.4014322991403556, "grad_norm": 1.8547910451889038, "learning_rate": 6.7980950379882146e-06, "loss": 0.8175, "step": 11365 }, { "epoch": 0.4014676209440635, "grad_norm": 1.008713722229004, "learning_rate": 6.797561284246942e-06, "loss": 0.5759, "step": 11366 }, { "epoch": 0.40150294274777143, "grad_norm": 1.6361230611801147, "learning_rate": 6.797027506979523e-06, "loss": 0.786, "step": 11367 }, { "epoch": 0.40153826455147934, "grad_norm": 0.9661943316459656, "learning_rate": 6.796493706192947e-06, "loss": 0.5993, "step": 11368 }, { "epoch": 0.40157358635518725, "grad_norm": 1.539985179901123, "learning_rate": 6.7959598818941986e-06, "loss": 0.8053, "step": 11369 }, { "epoch": 0.40160890815889516, "grad_norm": 1.5046985149383545, "learning_rate": 6.7954260340902645e-06, "loss": 0.8094, "step": 11370 }, { "epoch": 0.401644229962603, "grad_norm": 1.5643588304519653, "learning_rate": 6.794892162788134e-06, "loss": 0.8019, "step": 11371 }, { "epoch": 0.4016795517663109, "grad_norm": 1.648465633392334, "learning_rate": 6.794358267994792e-06, "loss": 0.8065, "step": 11372 }, { "epoch": 0.40171487357001884, "grad_norm": 2.2045230865478516, "learning_rate": 6.793824349717224e-06, "loss": 0.8709, "step": 11373 }, { "epoch": 0.40175019537372675, "grad_norm": 1.7381855249404907, "learning_rate": 6.793290407962422e-06, "loss": 0.788, "step": 11374 }, { "epoch": 0.40178551717743466, "grad_norm": 1.7942609786987305, "learning_rate": 6.792756442737371e-06, "loss": 0.7972, "step": 11375 }, { "epoch": 0.40182083898114257, "grad_norm": 1.6855978965759277, "learning_rate": 6.792222454049061e-06, "loss": 0.79, "step": 11376 }, { "epoch": 0.4018561607848505, "grad_norm": 1.7705365419387817, "learning_rate": 6.79168844190448e-06, "loss": 0.7979, "step": 11377 }, { "epoch": 0.4018914825885584, "grad_norm": 1.5648131370544434, "learning_rate": 6.791154406310618e-06, "loss": 0.789, "step": 11378 }, { "epoch": 0.4019268043922663, "grad_norm": 1.5714068412780762, "learning_rate": 6.790620347274465e-06, "loss": 0.7952, "step": 11379 }, { "epoch": 0.4019621261959742, "grad_norm": 1.710094690322876, "learning_rate": 6.790086264803009e-06, "loss": 0.7905, "step": 11380 }, { "epoch": 0.4019974479996821, "grad_norm": 1.853323221206665, "learning_rate": 6.789552158903238e-06, "loss": 0.8346, "step": 11381 }, { "epoch": 0.40203276980339003, "grad_norm": 1.6692436933517456, "learning_rate": 6.789018029582149e-06, "loss": 0.8047, "step": 11382 }, { "epoch": 0.40206809160709794, "grad_norm": 1.8244153261184692, "learning_rate": 6.788483876846725e-06, "loss": 0.7974, "step": 11383 }, { "epoch": 0.4021034134108058, "grad_norm": 1.784043550491333, "learning_rate": 6.787949700703962e-06, "loss": 0.8409, "step": 11384 }, { "epoch": 0.4021387352145137, "grad_norm": 1.438172698020935, "learning_rate": 6.787415501160848e-06, "loss": 0.7952, "step": 11385 }, { "epoch": 0.4021740570182216, "grad_norm": 1.9048429727554321, "learning_rate": 6.786881278224375e-06, "loss": 0.8132, "step": 11386 }, { "epoch": 0.40220937882192953, "grad_norm": 1.796879768371582, "learning_rate": 6.786347031901539e-06, "loss": 0.8153, "step": 11387 }, { "epoch": 0.40224470062563744, "grad_norm": 1.7237951755523682, "learning_rate": 6.785812762199326e-06, "loss": 0.8199, "step": 11388 }, { "epoch": 0.40228002242934535, "grad_norm": 1.5861141681671143, "learning_rate": 6.785278469124732e-06, "loss": 0.8135, "step": 11389 }, { "epoch": 0.40231534423305326, "grad_norm": 1.7162977457046509, "learning_rate": 6.7847441526847504e-06, "loss": 0.8042, "step": 11390 }, { "epoch": 0.4023506660367612, "grad_norm": 1.5826835632324219, "learning_rate": 6.784209812886372e-06, "loss": 0.7713, "step": 11391 }, { "epoch": 0.4023859878404691, "grad_norm": 1.7073472738265991, "learning_rate": 6.783675449736591e-06, "loss": 0.8473, "step": 11392 }, { "epoch": 0.402421309644177, "grad_norm": 2.1446220874786377, "learning_rate": 6.783141063242401e-06, "loss": 0.8406, "step": 11393 }, { "epoch": 0.4024566314478849, "grad_norm": 1.5667461156845093, "learning_rate": 6.7826066534107944e-06, "loss": 0.7977, "step": 11394 }, { "epoch": 0.4024919532515928, "grad_norm": 1.7094792127609253, "learning_rate": 6.78207222024877e-06, "loss": 0.8086, "step": 11395 }, { "epoch": 0.4025272750553007, "grad_norm": 1.6983692646026611, "learning_rate": 6.781537763763317e-06, "loss": 0.8129, "step": 11396 }, { "epoch": 0.4025625968590086, "grad_norm": 1.843187928199768, "learning_rate": 6.7810032839614325e-06, "loss": 0.846, "step": 11397 }, { "epoch": 0.4025979186627165, "grad_norm": 1.7954175472259521, "learning_rate": 6.780468780850113e-06, "loss": 0.8162, "step": 11398 }, { "epoch": 0.4026332404664244, "grad_norm": 1.5203496217727661, "learning_rate": 6.7799342544363525e-06, "loss": 0.7599, "step": 11399 }, { "epoch": 0.4026685622701323, "grad_norm": 1.6957341432571411, "learning_rate": 6.779399704727148e-06, "loss": 0.8184, "step": 11400 }, { "epoch": 0.4027038840738402, "grad_norm": 1.5852943658828735, "learning_rate": 6.778865131729494e-06, "loss": 0.8175, "step": 11401 }, { "epoch": 0.40273920587754813, "grad_norm": 1.7133338451385498, "learning_rate": 6.7783305354503855e-06, "loss": 0.8035, "step": 11402 }, { "epoch": 0.40277452768125604, "grad_norm": 1.5831507444381714, "learning_rate": 6.777795915896824e-06, "loss": 0.8118, "step": 11403 }, { "epoch": 0.40280984948496396, "grad_norm": 1.6566331386566162, "learning_rate": 6.777261273075802e-06, "loss": 0.7984, "step": 11404 }, { "epoch": 0.40284517128867187, "grad_norm": 1.672201156616211, "learning_rate": 6.776726606994319e-06, "loss": 0.7875, "step": 11405 }, { "epoch": 0.4028804930923798, "grad_norm": 1.5599898099899292, "learning_rate": 6.7761919176593735e-06, "loss": 0.7979, "step": 11406 }, { "epoch": 0.4029158148960877, "grad_norm": 1.5857452154159546, "learning_rate": 6.77565720507796e-06, "loss": 0.7845, "step": 11407 }, { "epoch": 0.4029511366997956, "grad_norm": 1.6137340068817139, "learning_rate": 6.77512246925708e-06, "loss": 0.8206, "step": 11408 }, { "epoch": 0.4029864585035035, "grad_norm": 2.1120200157165527, "learning_rate": 6.774587710203731e-06, "loss": 0.802, "step": 11409 }, { "epoch": 0.40302178030721136, "grad_norm": 1.778936743736267, "learning_rate": 6.774052927924911e-06, "loss": 0.7742, "step": 11410 }, { "epoch": 0.4030571021109193, "grad_norm": 1.784784197807312, "learning_rate": 6.77351812242762e-06, "loss": 0.7907, "step": 11411 }, { "epoch": 0.4030924239146272, "grad_norm": 1.7718178033828735, "learning_rate": 6.772983293718858e-06, "loss": 0.8567, "step": 11412 }, { "epoch": 0.4031277457183351, "grad_norm": 1.7067853212356567, "learning_rate": 6.772448441805623e-06, "loss": 0.8368, "step": 11413 }, { "epoch": 0.403163067522043, "grad_norm": 1.5081489086151123, "learning_rate": 6.771913566694917e-06, "loss": 0.7672, "step": 11414 }, { "epoch": 0.4031983893257509, "grad_norm": 1.866106390953064, "learning_rate": 6.771378668393737e-06, "loss": 0.8431, "step": 11415 }, { "epoch": 0.4032337111294588, "grad_norm": 1.6826246976852417, "learning_rate": 6.770843746909087e-06, "loss": 0.7913, "step": 11416 }, { "epoch": 0.40326903293316674, "grad_norm": 1.7036854028701782, "learning_rate": 6.770308802247968e-06, "loss": 0.8439, "step": 11417 }, { "epoch": 0.40330435473687465, "grad_norm": 1.89546799659729, "learning_rate": 6.769773834417379e-06, "loss": 0.8343, "step": 11418 }, { "epoch": 0.40333967654058256, "grad_norm": 1.611276388168335, "learning_rate": 6.769238843424324e-06, "loss": 0.8374, "step": 11419 }, { "epoch": 0.40337499834429047, "grad_norm": 1.9574275016784668, "learning_rate": 6.768703829275802e-06, "loss": 0.8295, "step": 11420 }, { "epoch": 0.4034103201479984, "grad_norm": 1.813876748085022, "learning_rate": 6.7681687919788175e-06, "loss": 0.8201, "step": 11421 }, { "epoch": 0.4034456419517063, "grad_norm": 1.6447477340698242, "learning_rate": 6.767633731540373e-06, "loss": 0.8579, "step": 11422 }, { "epoch": 0.40348096375541415, "grad_norm": 1.6586713790893555, "learning_rate": 6.767098647967469e-06, "loss": 0.8328, "step": 11423 }, { "epoch": 0.40351628555912206, "grad_norm": 1.7743397951126099, "learning_rate": 6.766563541267111e-06, "loss": 0.7993, "step": 11424 }, { "epoch": 0.40355160736282997, "grad_norm": 1.6937119960784912, "learning_rate": 6.766028411446302e-06, "loss": 0.8474, "step": 11425 }, { "epoch": 0.4035869291665379, "grad_norm": 1.6485248804092407, "learning_rate": 6.765493258512044e-06, "loss": 0.7591, "step": 11426 }, { "epoch": 0.4036222509702458, "grad_norm": 1.6076231002807617, "learning_rate": 6.764958082471342e-06, "loss": 0.8137, "step": 11427 }, { "epoch": 0.4036575727739537, "grad_norm": 1.616331934928894, "learning_rate": 6.764422883331202e-06, "loss": 0.7794, "step": 11428 }, { "epoch": 0.4036928945776616, "grad_norm": 1.6370673179626465, "learning_rate": 6.763887661098626e-06, "loss": 0.7977, "step": 11429 }, { "epoch": 0.4037282163813695, "grad_norm": 1.8362950086593628, "learning_rate": 6.76335241578062e-06, "loss": 0.817, "step": 11430 }, { "epoch": 0.40376353818507743, "grad_norm": 1.5039745569229126, "learning_rate": 6.762817147384188e-06, "loss": 0.8221, "step": 11431 }, { "epoch": 0.40379885998878534, "grad_norm": 1.6849021911621094, "learning_rate": 6.762281855916338e-06, "loss": 0.8135, "step": 11432 }, { "epoch": 0.40383418179249325, "grad_norm": 1.6028708219528198, "learning_rate": 6.761746541384074e-06, "loss": 0.8284, "step": 11433 }, { "epoch": 0.40386950359620116, "grad_norm": 1.6290035247802734, "learning_rate": 6.761211203794401e-06, "loss": 0.8125, "step": 11434 }, { "epoch": 0.4039048253999091, "grad_norm": 1.6311664581298828, "learning_rate": 6.760675843154329e-06, "loss": 0.8388, "step": 11435 }, { "epoch": 0.40394014720361693, "grad_norm": 1.5360944271087646, "learning_rate": 6.760140459470861e-06, "loss": 0.8075, "step": 11436 }, { "epoch": 0.40397546900732484, "grad_norm": 1.7680182456970215, "learning_rate": 6.759605052751007e-06, "loss": 0.7843, "step": 11437 }, { "epoch": 0.40401079081103275, "grad_norm": 1.8067735433578491, "learning_rate": 6.759069623001772e-06, "loss": 0.802, "step": 11438 }, { "epoch": 0.40404611261474066, "grad_norm": 1.012959361076355, "learning_rate": 6.758534170230164e-06, "loss": 0.5782, "step": 11439 }, { "epoch": 0.40408143441844857, "grad_norm": 1.654321312904358, "learning_rate": 6.757998694443192e-06, "loss": 0.833, "step": 11440 }, { "epoch": 0.4041167562221565, "grad_norm": 1.6834778785705566, "learning_rate": 6.757463195647862e-06, "loss": 0.7857, "step": 11441 }, { "epoch": 0.4041520780258644, "grad_norm": 1.7183969020843506, "learning_rate": 6.7569276738511855e-06, "loss": 0.8258, "step": 11442 }, { "epoch": 0.4041873998295723, "grad_norm": 1.6894843578338623, "learning_rate": 6.756392129060171e-06, "loss": 0.8179, "step": 11443 }, { "epoch": 0.4042227216332802, "grad_norm": 1.688482403755188, "learning_rate": 6.755856561281824e-06, "loss": 0.7925, "step": 11444 }, { "epoch": 0.4042580434369881, "grad_norm": 1.62144935131073, "learning_rate": 6.755320970523158e-06, "loss": 0.8382, "step": 11445 }, { "epoch": 0.40429336524069603, "grad_norm": 1.5812712907791138, "learning_rate": 6.75478535679118e-06, "loss": 0.8122, "step": 11446 }, { "epoch": 0.40432868704440394, "grad_norm": 1.5901784896850586, "learning_rate": 6.754249720092902e-06, "loss": 0.8081, "step": 11447 }, { "epoch": 0.40436400884811186, "grad_norm": 1.6450825929641724, "learning_rate": 6.753714060435334e-06, "loss": 0.7945, "step": 11448 }, { "epoch": 0.4043993306518197, "grad_norm": 1.7648273706436157, "learning_rate": 6.753178377825484e-06, "loss": 0.804, "step": 11449 }, { "epoch": 0.4044346524555276, "grad_norm": 1.6234761476516724, "learning_rate": 6.752642672270367e-06, "loss": 0.8214, "step": 11450 }, { "epoch": 0.40446997425923553, "grad_norm": 1.616031527519226, "learning_rate": 6.752106943776992e-06, "loss": 0.8186, "step": 11451 }, { "epoch": 0.40450529606294344, "grad_norm": 1.6406033039093018, "learning_rate": 6.7515711923523695e-06, "loss": 0.8064, "step": 11452 }, { "epoch": 0.40454061786665135, "grad_norm": 1.7729413509368896, "learning_rate": 6.751035418003513e-06, "loss": 0.7823, "step": 11453 }, { "epoch": 0.40457593967035926, "grad_norm": 1.4894511699676514, "learning_rate": 6.750499620737435e-06, "loss": 0.8182, "step": 11454 }, { "epoch": 0.4046112614740672, "grad_norm": 1.5732097625732422, "learning_rate": 6.749963800561146e-06, "loss": 0.7973, "step": 11455 }, { "epoch": 0.4046465832777751, "grad_norm": 1.6079005002975464, "learning_rate": 6.74942795748166e-06, "loss": 0.809, "step": 11456 }, { "epoch": 0.404681905081483, "grad_norm": 1.689591884613037, "learning_rate": 6.74889209150599e-06, "loss": 0.792, "step": 11457 }, { "epoch": 0.4047172268851909, "grad_norm": 1.5759975910186768, "learning_rate": 6.748356202641149e-06, "loss": 0.8455, "step": 11458 }, { "epoch": 0.4047525486888988, "grad_norm": 1.5626237392425537, "learning_rate": 6.7478202908941506e-06, "loss": 0.8321, "step": 11459 }, { "epoch": 0.4047878704926067, "grad_norm": 1.664214849472046, "learning_rate": 6.747284356272008e-06, "loss": 0.7935, "step": 11460 }, { "epoch": 0.40482319229631464, "grad_norm": 1.7184524536132812, "learning_rate": 6.7467483987817374e-06, "loss": 0.7844, "step": 11461 }, { "epoch": 0.4048585141000225, "grad_norm": 1.676551103591919, "learning_rate": 6.74621241843035e-06, "loss": 0.8546, "step": 11462 }, { "epoch": 0.4048938359037304, "grad_norm": 1.7440096139907837, "learning_rate": 6.7456764152248645e-06, "loss": 0.8525, "step": 11463 }, { "epoch": 0.4049291577074383, "grad_norm": 1.6671279668807983, "learning_rate": 6.745140389172294e-06, "loss": 0.809, "step": 11464 }, { "epoch": 0.4049644795111462, "grad_norm": 1.5295525789260864, "learning_rate": 6.7446043402796545e-06, "loss": 0.7779, "step": 11465 }, { "epoch": 0.40499980131485414, "grad_norm": 0.9831985235214233, "learning_rate": 6.744068268553961e-06, "loss": 0.589, "step": 11466 }, { "epoch": 0.40503512311856205, "grad_norm": 1.8279650211334229, "learning_rate": 6.743532174002229e-06, "loss": 0.8286, "step": 11467 }, { "epoch": 0.40507044492226996, "grad_norm": 1.541721224784851, "learning_rate": 6.742996056631476e-06, "loss": 0.8501, "step": 11468 }, { "epoch": 0.40510576672597787, "grad_norm": 1.8089147806167603, "learning_rate": 6.74245991644872e-06, "loss": 0.8407, "step": 11469 }, { "epoch": 0.4051410885296858, "grad_norm": 1.71999990940094, "learning_rate": 6.741923753460975e-06, "loss": 0.7896, "step": 11470 }, { "epoch": 0.4051764103333937, "grad_norm": 1.7407593727111816, "learning_rate": 6.74138756767526e-06, "loss": 0.7959, "step": 11471 }, { "epoch": 0.4052117321371016, "grad_norm": 1.6388565301895142, "learning_rate": 6.74085135909859e-06, "loss": 0.7936, "step": 11472 }, { "epoch": 0.4052470539408095, "grad_norm": 1.6244529485702515, "learning_rate": 6.740315127737985e-06, "loss": 0.8048, "step": 11473 }, { "epoch": 0.4052823757445174, "grad_norm": 1.67003333568573, "learning_rate": 6.739778873600464e-06, "loss": 0.7731, "step": 11474 }, { "epoch": 0.4053176975482253, "grad_norm": 1.599269151687622, "learning_rate": 6.739242596693044e-06, "loss": 0.8003, "step": 11475 }, { "epoch": 0.4053530193519332, "grad_norm": 1.5493676662445068, "learning_rate": 6.7387062970227435e-06, "loss": 0.8212, "step": 11476 }, { "epoch": 0.4053883411556411, "grad_norm": 1.6238447427749634, "learning_rate": 6.738169974596582e-06, "loss": 0.8166, "step": 11477 }, { "epoch": 0.405423662959349, "grad_norm": 1.6959534883499146, "learning_rate": 6.737633629421578e-06, "loss": 0.7829, "step": 11478 }, { "epoch": 0.4054589847630569, "grad_norm": 1.7917149066925049, "learning_rate": 6.737097261504752e-06, "loss": 0.7802, "step": 11479 }, { "epoch": 0.40549430656676483, "grad_norm": 1.5756648778915405, "learning_rate": 6.736560870853124e-06, "loss": 0.8095, "step": 11480 }, { "epoch": 0.40552962837047274, "grad_norm": 1.779163122177124, "learning_rate": 6.736024457473713e-06, "loss": 0.8388, "step": 11481 }, { "epoch": 0.40556495017418065, "grad_norm": 1.7015113830566406, "learning_rate": 6.7354880213735405e-06, "loss": 0.7883, "step": 11482 }, { "epoch": 0.40560027197788856, "grad_norm": 1.614579200744629, "learning_rate": 6.734951562559628e-06, "loss": 0.7961, "step": 11483 }, { "epoch": 0.40563559378159647, "grad_norm": 1.8674330711364746, "learning_rate": 6.734415081038995e-06, "loss": 0.8199, "step": 11484 }, { "epoch": 0.4056709155853044, "grad_norm": 1.8050591945648193, "learning_rate": 6.7338785768186635e-06, "loss": 0.8321, "step": 11485 }, { "epoch": 0.4057062373890123, "grad_norm": 1.602200984954834, "learning_rate": 6.733342049905654e-06, "loss": 0.8366, "step": 11486 }, { "epoch": 0.4057415591927202, "grad_norm": 1.523628830909729, "learning_rate": 6.732805500306991e-06, "loss": 0.8321, "step": 11487 }, { "epoch": 0.40577688099642806, "grad_norm": 1.6614919900894165, "learning_rate": 6.732268928029695e-06, "loss": 0.8316, "step": 11488 }, { "epoch": 0.40581220280013597, "grad_norm": 2.1206207275390625, "learning_rate": 6.731732333080788e-06, "loss": 0.8609, "step": 11489 }, { "epoch": 0.4058475246038439, "grad_norm": 1.8321913480758667, "learning_rate": 6.731195715467294e-06, "loss": 0.7977, "step": 11490 }, { "epoch": 0.4058828464075518, "grad_norm": 1.6736998558044434, "learning_rate": 6.730659075196236e-06, "loss": 0.8098, "step": 11491 }, { "epoch": 0.4059181682112597, "grad_norm": 1.5638197660446167, "learning_rate": 6.730122412274639e-06, "loss": 0.7767, "step": 11492 }, { "epoch": 0.4059534900149676, "grad_norm": 1.5062708854675293, "learning_rate": 6.729585726709525e-06, "loss": 0.8281, "step": 11493 }, { "epoch": 0.4059888118186755, "grad_norm": 1.6774667501449585, "learning_rate": 6.729049018507916e-06, "loss": 0.7988, "step": 11494 }, { "epoch": 0.40602413362238343, "grad_norm": 1.7563024759292603, "learning_rate": 6.72851228767684e-06, "loss": 0.806, "step": 11495 }, { "epoch": 0.40605945542609134, "grad_norm": 1.713843584060669, "learning_rate": 6.727975534223318e-06, "loss": 0.782, "step": 11496 }, { "epoch": 0.40609477722979925, "grad_norm": 1.7196016311645508, "learning_rate": 6.727438758154379e-06, "loss": 0.8148, "step": 11497 }, { "epoch": 0.40613009903350716, "grad_norm": 0.8898063898086548, "learning_rate": 6.7269019594770455e-06, "loss": 0.61, "step": 11498 }, { "epoch": 0.4061654208372151, "grad_norm": 1.7535505294799805, "learning_rate": 6.726365138198343e-06, "loss": 0.8036, "step": 11499 }, { "epoch": 0.406200742640923, "grad_norm": 1.6362839937210083, "learning_rate": 6.725828294325301e-06, "loss": 0.7786, "step": 11500 }, { "epoch": 0.40623606444463084, "grad_norm": 1.6823313236236572, "learning_rate": 6.72529142786494e-06, "loss": 0.7765, "step": 11501 }, { "epoch": 0.40627138624833875, "grad_norm": 1.8141404390335083, "learning_rate": 6.7247545388242906e-06, "loss": 0.8096, "step": 11502 }, { "epoch": 0.40630670805204666, "grad_norm": 1.7175086736679077, "learning_rate": 6.724217627210378e-06, "loss": 0.8358, "step": 11503 }, { "epoch": 0.40634202985575457, "grad_norm": 2.3480935096740723, "learning_rate": 6.723680693030228e-06, "loss": 0.7977, "step": 11504 }, { "epoch": 0.4063773516594625, "grad_norm": 1.6325583457946777, "learning_rate": 6.723143736290869e-06, "loss": 0.83, "step": 11505 }, { "epoch": 0.4064126734631704, "grad_norm": 1.8573877811431885, "learning_rate": 6.7226067569993305e-06, "loss": 0.8393, "step": 11506 }, { "epoch": 0.4064479952668783, "grad_norm": 1.6612117290496826, "learning_rate": 6.722069755162637e-06, "loss": 0.8753, "step": 11507 }, { "epoch": 0.4064833170705862, "grad_norm": 1.6783989667892456, "learning_rate": 6.7215327307878184e-06, "loss": 0.8185, "step": 11508 }, { "epoch": 0.4065186388742941, "grad_norm": 1.5615437030792236, "learning_rate": 6.720995683881904e-06, "loss": 0.7618, "step": 11509 }, { "epoch": 0.40655396067800204, "grad_norm": 1.7623106241226196, "learning_rate": 6.72045861445192e-06, "loss": 0.8271, "step": 11510 }, { "epoch": 0.40658928248170995, "grad_norm": 1.7107388973236084, "learning_rate": 6.7199215225049e-06, "loss": 0.8027, "step": 11511 }, { "epoch": 0.40662460428541786, "grad_norm": 2.97123646736145, "learning_rate": 6.7193844080478664e-06, "loss": 0.8215, "step": 11512 }, { "epoch": 0.40665992608912577, "grad_norm": 1.7883886098861694, "learning_rate": 6.718847271087856e-06, "loss": 0.8248, "step": 11513 }, { "epoch": 0.4066952478928336, "grad_norm": 1.71780526638031, "learning_rate": 6.718310111631895e-06, "loss": 0.8451, "step": 11514 }, { "epoch": 0.40673056969654153, "grad_norm": 1.6188015937805176, "learning_rate": 6.717772929687013e-06, "loss": 0.8228, "step": 11515 }, { "epoch": 0.40676589150024944, "grad_norm": 1.877548098564148, "learning_rate": 6.717235725260243e-06, "loss": 0.8292, "step": 11516 }, { "epoch": 0.40680121330395735, "grad_norm": 1.6097726821899414, "learning_rate": 6.716698498358613e-06, "loss": 0.8025, "step": 11517 }, { "epoch": 0.40683653510766526, "grad_norm": 2.574746608734131, "learning_rate": 6.716161248989157e-06, "loss": 0.8127, "step": 11518 }, { "epoch": 0.4068718569113732, "grad_norm": 1.688412070274353, "learning_rate": 6.715623977158906e-06, "loss": 0.7912, "step": 11519 }, { "epoch": 0.4069071787150811, "grad_norm": 1.7716145515441895, "learning_rate": 6.71508668287489e-06, "loss": 0.7907, "step": 11520 }, { "epoch": 0.406942500518789, "grad_norm": 1.6099936962127686, "learning_rate": 6.714549366144143e-06, "loss": 0.7589, "step": 11521 }, { "epoch": 0.4069778223224969, "grad_norm": 1.5934635400772095, "learning_rate": 6.7140120269736955e-06, "loss": 0.8089, "step": 11522 }, { "epoch": 0.4070131441262048, "grad_norm": 1.6088353395462036, "learning_rate": 6.7134746653705795e-06, "loss": 0.8026, "step": 11523 }, { "epoch": 0.40704846592991273, "grad_norm": 1.6560696363449097, "learning_rate": 6.71293728134183e-06, "loss": 0.7875, "step": 11524 }, { "epoch": 0.40708378773362064, "grad_norm": 1.7492009401321411, "learning_rate": 6.71239987489448e-06, "loss": 0.8041, "step": 11525 }, { "epoch": 0.40711910953732855, "grad_norm": 1.820918083190918, "learning_rate": 6.7118624460355606e-06, "loss": 0.8254, "step": 11526 }, { "epoch": 0.4071544313410364, "grad_norm": 1.5385353565216064, "learning_rate": 6.711324994772109e-06, "loss": 0.8029, "step": 11527 }, { "epoch": 0.4071897531447443, "grad_norm": 1.799975872039795, "learning_rate": 6.710787521111157e-06, "loss": 0.7725, "step": 11528 }, { "epoch": 0.4072250749484522, "grad_norm": 1.592856526374817, "learning_rate": 6.7102500250597415e-06, "loss": 0.813, "step": 11529 }, { "epoch": 0.40726039675216014, "grad_norm": 1.7256557941436768, "learning_rate": 6.7097125066248915e-06, "loss": 0.8437, "step": 11530 }, { "epoch": 0.40729571855586805, "grad_norm": 1.7062618732452393, "learning_rate": 6.709174965813648e-06, "loss": 0.8016, "step": 11531 }, { "epoch": 0.40733104035957596, "grad_norm": 1.7633476257324219, "learning_rate": 6.708637402633044e-06, "loss": 0.822, "step": 11532 }, { "epoch": 0.40736636216328387, "grad_norm": 1.6838860511779785, "learning_rate": 6.708099817090112e-06, "loss": 0.8136, "step": 11533 }, { "epoch": 0.4074016839669918, "grad_norm": 1.6349581480026245, "learning_rate": 6.707562209191893e-06, "loss": 0.7941, "step": 11534 }, { "epoch": 0.4074370057706997, "grad_norm": 1.6854803562164307, "learning_rate": 6.707024578945421e-06, "loss": 0.8292, "step": 11535 }, { "epoch": 0.4074723275744076, "grad_norm": 1.6805552244186401, "learning_rate": 6.706486926357731e-06, "loss": 0.7913, "step": 11536 }, { "epoch": 0.4075076493781155, "grad_norm": 1.595900297164917, "learning_rate": 6.7059492514358625e-06, "loss": 0.8278, "step": 11537 }, { "epoch": 0.4075429711818234, "grad_norm": 1.5597155094146729, "learning_rate": 6.70541155418685e-06, "loss": 0.8013, "step": 11538 }, { "epoch": 0.40757829298553133, "grad_norm": 2.147289276123047, "learning_rate": 6.704873834617732e-06, "loss": 0.8073, "step": 11539 }, { "epoch": 0.4076136147892392, "grad_norm": 1.5787129402160645, "learning_rate": 6.704336092735546e-06, "loss": 0.8003, "step": 11540 }, { "epoch": 0.4076489365929471, "grad_norm": 1.543934941291809, "learning_rate": 6.703798328547328e-06, "loss": 0.8133, "step": 11541 }, { "epoch": 0.407684258396655, "grad_norm": 1.7280606031417847, "learning_rate": 6.703260542060119e-06, "loss": 0.8118, "step": 11542 }, { "epoch": 0.4077195802003629, "grad_norm": 1.5843805074691772, "learning_rate": 6.702722733280958e-06, "loss": 0.7924, "step": 11543 }, { "epoch": 0.40775490200407083, "grad_norm": 1.5321966409683228, "learning_rate": 6.702184902216878e-06, "loss": 0.8294, "step": 11544 }, { "epoch": 0.40779022380777874, "grad_norm": 2.0052988529205322, "learning_rate": 6.701647048874924e-06, "loss": 0.8505, "step": 11545 }, { "epoch": 0.40782554561148665, "grad_norm": 1.6902668476104736, "learning_rate": 6.701109173262133e-06, "loss": 0.7741, "step": 11546 }, { "epoch": 0.40786086741519456, "grad_norm": 1.639021396636963, "learning_rate": 6.700571275385546e-06, "loss": 0.7977, "step": 11547 }, { "epoch": 0.40789618921890247, "grad_norm": 1.66306471824646, "learning_rate": 6.700033355252201e-06, "loss": 0.7967, "step": 11548 }, { "epoch": 0.4079315110226104, "grad_norm": 1.6219596862792969, "learning_rate": 6.699495412869138e-06, "loss": 0.8119, "step": 11549 }, { "epoch": 0.4079668328263183, "grad_norm": 1.8841724395751953, "learning_rate": 6.6989574482434e-06, "loss": 0.805, "step": 11550 }, { "epoch": 0.4080021546300262, "grad_norm": 2.0320892333984375, "learning_rate": 6.6984194613820265e-06, "loss": 0.8458, "step": 11551 }, { "epoch": 0.4080374764337341, "grad_norm": 2.135239839553833, "learning_rate": 6.697881452292057e-06, "loss": 0.8129, "step": 11552 }, { "epoch": 0.40807279823744197, "grad_norm": 1.7102876901626587, "learning_rate": 6.6973434209805344e-06, "loss": 0.861, "step": 11553 }, { "epoch": 0.4081081200411499, "grad_norm": 1.8122791051864624, "learning_rate": 6.6968053674545e-06, "loss": 0.8383, "step": 11554 }, { "epoch": 0.4081434418448578, "grad_norm": 1.8075711727142334, "learning_rate": 6.696267291720997e-06, "loss": 0.8227, "step": 11555 }, { "epoch": 0.4081787636485657, "grad_norm": 1.8724421262741089, "learning_rate": 6.695729193787066e-06, "loss": 0.7981, "step": 11556 }, { "epoch": 0.4082140854522736, "grad_norm": 1.732843279838562, "learning_rate": 6.6951910736597506e-06, "loss": 0.8173, "step": 11557 }, { "epoch": 0.4082494072559815, "grad_norm": 1.6741267442703247, "learning_rate": 6.694652931346093e-06, "loss": 0.8191, "step": 11558 }, { "epoch": 0.40828472905968943, "grad_norm": 1.7841025590896606, "learning_rate": 6.6941147668531345e-06, "loss": 0.8153, "step": 11559 }, { "epoch": 0.40832005086339734, "grad_norm": 1.8501899242401123, "learning_rate": 6.693576580187923e-06, "loss": 0.7902, "step": 11560 }, { "epoch": 0.40835537266710525, "grad_norm": 1.7301801443099976, "learning_rate": 6.693038371357498e-06, "loss": 0.8231, "step": 11561 }, { "epoch": 0.40839069447081316, "grad_norm": 1.709693431854248, "learning_rate": 6.692500140368905e-06, "loss": 0.8294, "step": 11562 }, { "epoch": 0.4084260162745211, "grad_norm": 1.9554145336151123, "learning_rate": 6.69196188722919e-06, "loss": 0.8679, "step": 11563 }, { "epoch": 0.408461338078229, "grad_norm": 1.6466292142868042, "learning_rate": 6.691423611945395e-06, "loss": 0.8554, "step": 11564 }, { "epoch": 0.4084966598819369, "grad_norm": 1.6296898126602173, "learning_rate": 6.690885314524565e-06, "loss": 0.8219, "step": 11565 }, { "epoch": 0.40853198168564475, "grad_norm": 1.9247442483901978, "learning_rate": 6.690346994973747e-06, "loss": 0.8183, "step": 11566 }, { "epoch": 0.40856730348935266, "grad_norm": 1.6173996925354004, "learning_rate": 6.689808653299984e-06, "loss": 0.8738, "step": 11567 }, { "epoch": 0.4086026252930606, "grad_norm": 1.7120792865753174, "learning_rate": 6.6892702895103235e-06, "loss": 0.8066, "step": 11568 }, { "epoch": 0.4086379470967685, "grad_norm": 1.5705082416534424, "learning_rate": 6.688731903611811e-06, "loss": 0.8427, "step": 11569 }, { "epoch": 0.4086732689004764, "grad_norm": 1.4758622646331787, "learning_rate": 6.688193495611492e-06, "loss": 0.8171, "step": 11570 }, { "epoch": 0.4087085907041843, "grad_norm": 1.6261299848556519, "learning_rate": 6.687655065516416e-06, "loss": 0.8144, "step": 11571 }, { "epoch": 0.4087439125078922, "grad_norm": 1.5936695337295532, "learning_rate": 6.687116613333626e-06, "loss": 0.802, "step": 11572 }, { "epoch": 0.4087792343116001, "grad_norm": 1.820560097694397, "learning_rate": 6.686578139070171e-06, "loss": 0.8345, "step": 11573 }, { "epoch": 0.40881455611530804, "grad_norm": 1.6556565761566162, "learning_rate": 6.6860396427331e-06, "loss": 0.7982, "step": 11574 }, { "epoch": 0.40884987791901595, "grad_norm": 1.6337742805480957, "learning_rate": 6.685501124329458e-06, "loss": 0.7986, "step": 11575 }, { "epoch": 0.40888519972272386, "grad_norm": 2.1593074798583984, "learning_rate": 6.6849625838662935e-06, "loss": 0.809, "step": 11576 }, { "epoch": 0.40892052152643177, "grad_norm": 1.5737680196762085, "learning_rate": 6.684424021350656e-06, "loss": 0.813, "step": 11577 }, { "epoch": 0.4089558433301397, "grad_norm": 3.173532485961914, "learning_rate": 6.683885436789594e-06, "loss": 0.8214, "step": 11578 }, { "epoch": 0.40899116513384753, "grad_norm": 1.7114338874816895, "learning_rate": 6.683346830190155e-06, "loss": 0.8078, "step": 11579 }, { "epoch": 0.40902648693755544, "grad_norm": 1.7270091772079468, "learning_rate": 6.68280820155939e-06, "loss": 0.8449, "step": 11580 }, { "epoch": 0.40906180874126336, "grad_norm": 1.8523743152618408, "learning_rate": 6.682269550904346e-06, "loss": 0.7866, "step": 11581 }, { "epoch": 0.40909713054497127, "grad_norm": 1.9654406309127808, "learning_rate": 6.6817308782320755e-06, "loss": 0.7918, "step": 11582 }, { "epoch": 0.4091324523486792, "grad_norm": 1.5900589227676392, "learning_rate": 6.681192183549628e-06, "loss": 0.7992, "step": 11583 }, { "epoch": 0.4091677741523871, "grad_norm": 1.8418771028518677, "learning_rate": 6.680653466864051e-06, "loss": 0.8112, "step": 11584 }, { "epoch": 0.409203095956095, "grad_norm": 1.6680852174758911, "learning_rate": 6.680114728182401e-06, "loss": 0.7922, "step": 11585 }, { "epoch": 0.4092384177598029, "grad_norm": 1.8217675685882568, "learning_rate": 6.679575967511722e-06, "loss": 0.82, "step": 11586 }, { "epoch": 0.4092737395635108, "grad_norm": 2.0257346630096436, "learning_rate": 6.679037184859071e-06, "loss": 0.7965, "step": 11587 }, { "epoch": 0.40930906136721873, "grad_norm": 1.557686686515808, "learning_rate": 6.678498380231494e-06, "loss": 0.7818, "step": 11588 }, { "epoch": 0.40934438317092664, "grad_norm": 1.72059965133667, "learning_rate": 6.677959553636048e-06, "loss": 0.8325, "step": 11589 }, { "epoch": 0.40937970497463455, "grad_norm": 1.7376776933670044, "learning_rate": 6.677420705079783e-06, "loss": 0.8196, "step": 11590 }, { "epoch": 0.40941502677834246, "grad_norm": 1.5681132078170776, "learning_rate": 6.676881834569749e-06, "loss": 0.7727, "step": 11591 }, { "epoch": 0.4094503485820503, "grad_norm": 1.5663338899612427, "learning_rate": 6.676342942113002e-06, "loss": 0.8127, "step": 11592 }, { "epoch": 0.4094856703857582, "grad_norm": 1.54071843624115, "learning_rate": 6.675804027716595e-06, "loss": 0.7732, "step": 11593 }, { "epoch": 0.40952099218946614, "grad_norm": 1.650538682937622, "learning_rate": 6.6752650913875785e-06, "loss": 0.8062, "step": 11594 }, { "epoch": 0.40955631399317405, "grad_norm": 1.6912627220153809, "learning_rate": 6.6747261331330075e-06, "loss": 0.8134, "step": 11595 }, { "epoch": 0.40959163579688196, "grad_norm": 1.5885220766067505, "learning_rate": 6.674187152959934e-06, "loss": 0.8462, "step": 11596 }, { "epoch": 0.40962695760058987, "grad_norm": 1.73720383644104, "learning_rate": 6.673648150875417e-06, "loss": 0.823, "step": 11597 }, { "epoch": 0.4096622794042978, "grad_norm": 1.6502903699874878, "learning_rate": 6.6731091268865055e-06, "loss": 0.8471, "step": 11598 }, { "epoch": 0.4096976012080057, "grad_norm": 1.706972360610962, "learning_rate": 6.672570081000257e-06, "loss": 0.7946, "step": 11599 }, { "epoch": 0.4097329230117136, "grad_norm": 1.804958462715149, "learning_rate": 6.6720310132237255e-06, "loss": 0.7522, "step": 11600 }, { "epoch": 0.4097682448154215, "grad_norm": 1.718842625617981, "learning_rate": 6.671491923563966e-06, "loss": 0.7798, "step": 11601 }, { "epoch": 0.4098035666191294, "grad_norm": 1.6720554828643799, "learning_rate": 6.670952812028036e-06, "loss": 0.8165, "step": 11602 }, { "epoch": 0.40983888842283733, "grad_norm": 1.5979688167572021, "learning_rate": 6.670413678622989e-06, "loss": 0.8259, "step": 11603 }, { "epoch": 0.40987421022654524, "grad_norm": 1.920091986656189, "learning_rate": 6.66987452335588e-06, "loss": 0.7961, "step": 11604 }, { "epoch": 0.4099095320302531, "grad_norm": 2.0027763843536377, "learning_rate": 6.669335346233769e-06, "loss": 0.8554, "step": 11605 }, { "epoch": 0.409944853833961, "grad_norm": 1.5390695333480835, "learning_rate": 6.668796147263709e-06, "loss": 0.8151, "step": 11606 }, { "epoch": 0.4099801756376689, "grad_norm": 1.5007457733154297, "learning_rate": 6.668256926452761e-06, "loss": 0.8034, "step": 11607 }, { "epoch": 0.41001549744137683, "grad_norm": 1.5830085277557373, "learning_rate": 6.667717683807979e-06, "loss": 0.8148, "step": 11608 }, { "epoch": 0.41005081924508474, "grad_norm": 1.6243140697479248, "learning_rate": 6.667178419336422e-06, "loss": 0.7874, "step": 11609 }, { "epoch": 0.41008614104879265, "grad_norm": 1.7366200685501099, "learning_rate": 6.666639133045145e-06, "loss": 0.8431, "step": 11610 }, { "epoch": 0.41012146285250056, "grad_norm": 2.033508062362671, "learning_rate": 6.66609982494121e-06, "loss": 0.8203, "step": 11611 }, { "epoch": 0.4101567846562085, "grad_norm": 1.689969778060913, "learning_rate": 6.6655604950316745e-06, "loss": 0.8143, "step": 11612 }, { "epoch": 0.4101921064599164, "grad_norm": 1.5723025798797607, "learning_rate": 6.665021143323595e-06, "loss": 0.8082, "step": 11613 }, { "epoch": 0.4102274282636243, "grad_norm": 1.6317143440246582, "learning_rate": 6.664481769824034e-06, "loss": 0.8042, "step": 11614 }, { "epoch": 0.4102627500673322, "grad_norm": 1.6435492038726807, "learning_rate": 6.663942374540044e-06, "loss": 0.8165, "step": 11615 }, { "epoch": 0.4102980718710401, "grad_norm": 1.6835753917694092, "learning_rate": 6.663402957478693e-06, "loss": 0.8, "step": 11616 }, { "epoch": 0.410333393674748, "grad_norm": 0.9075599312782288, "learning_rate": 6.662863518647035e-06, "loss": 0.5839, "step": 11617 }, { "epoch": 0.4103687154784559, "grad_norm": 1.6915326118469238, "learning_rate": 6.662324058052132e-06, "loss": 0.7988, "step": 11618 }, { "epoch": 0.4104040372821638, "grad_norm": 1.6602263450622559, "learning_rate": 6.661784575701044e-06, "loss": 0.7864, "step": 11619 }, { "epoch": 0.4104393590858717, "grad_norm": 1.583286166191101, "learning_rate": 6.661245071600832e-06, "loss": 0.8168, "step": 11620 }, { "epoch": 0.4104746808895796, "grad_norm": 1.6228725910186768, "learning_rate": 6.660705545758559e-06, "loss": 0.837, "step": 11621 }, { "epoch": 0.4105100026932875, "grad_norm": 1.6164637804031372, "learning_rate": 6.660165998181282e-06, "loss": 0.7947, "step": 11622 }, { "epoch": 0.41054532449699543, "grad_norm": 1.6198856830596924, "learning_rate": 6.6596264288760645e-06, "loss": 0.8326, "step": 11623 }, { "epoch": 0.41058064630070334, "grad_norm": 2.3440001010894775, "learning_rate": 6.659086837849969e-06, "loss": 0.7839, "step": 11624 }, { "epoch": 0.41061596810441126, "grad_norm": 1.738961935043335, "learning_rate": 6.658547225110056e-06, "loss": 0.8189, "step": 11625 }, { "epoch": 0.41065128990811917, "grad_norm": 1.5872496366500854, "learning_rate": 6.658007590663389e-06, "loss": 0.7691, "step": 11626 }, { "epoch": 0.4106866117118271, "grad_norm": 1.7287883758544922, "learning_rate": 6.65746793451703e-06, "loss": 0.7951, "step": 11627 }, { "epoch": 0.410721933515535, "grad_norm": 1.5848904848098755, "learning_rate": 6.656928256678043e-06, "loss": 0.809, "step": 11628 }, { "epoch": 0.4107572553192429, "grad_norm": 1.7545244693756104, "learning_rate": 6.656388557153489e-06, "loss": 0.8386, "step": 11629 }, { "epoch": 0.4107925771229508, "grad_norm": 1.9656753540039062, "learning_rate": 6.655848835950435e-06, "loss": 0.8084, "step": 11630 }, { "epoch": 0.41082789892665866, "grad_norm": 1.7661725282669067, "learning_rate": 6.655309093075941e-06, "loss": 0.8294, "step": 11631 }, { "epoch": 0.4108632207303666, "grad_norm": 1.7242616415023804, "learning_rate": 6.654769328537074e-06, "loss": 0.814, "step": 11632 }, { "epoch": 0.4108985425340745, "grad_norm": 1.6410521268844604, "learning_rate": 6.654229542340896e-06, "loss": 0.7929, "step": 11633 }, { "epoch": 0.4109338643377824, "grad_norm": 1.8869500160217285, "learning_rate": 6.653689734494473e-06, "loss": 0.7962, "step": 11634 }, { "epoch": 0.4109691861414903, "grad_norm": 1.7179325819015503, "learning_rate": 6.6531499050048696e-06, "loss": 0.7556, "step": 11635 }, { "epoch": 0.4110045079451982, "grad_norm": 1.7778568267822266, "learning_rate": 6.652610053879152e-06, "loss": 0.8031, "step": 11636 }, { "epoch": 0.4110398297489061, "grad_norm": 1.5593396425247192, "learning_rate": 6.652070181124384e-06, "loss": 0.811, "step": 11637 }, { "epoch": 0.41107515155261404, "grad_norm": 1.7158658504486084, "learning_rate": 6.651530286747632e-06, "loss": 0.7779, "step": 11638 }, { "epoch": 0.41111047335632195, "grad_norm": 1.9676024913787842, "learning_rate": 6.650990370755961e-06, "loss": 0.8356, "step": 11639 }, { "epoch": 0.41114579516002986, "grad_norm": 1.813773274421692, "learning_rate": 6.65045043315644e-06, "loss": 0.8227, "step": 11640 }, { "epoch": 0.41118111696373777, "grad_norm": 1.7282110452651978, "learning_rate": 6.649910473956132e-06, "loss": 0.8186, "step": 11641 }, { "epoch": 0.4112164387674457, "grad_norm": 1.758832573890686, "learning_rate": 6.649370493162108e-06, "loss": 0.8312, "step": 11642 }, { "epoch": 0.4112517605711536, "grad_norm": 1.8201262950897217, "learning_rate": 6.648830490781433e-06, "loss": 0.8181, "step": 11643 }, { "epoch": 0.41128708237486145, "grad_norm": 1.8409700393676758, "learning_rate": 6.6482904668211725e-06, "loss": 0.8253, "step": 11644 }, { "epoch": 0.41132240417856936, "grad_norm": 1.6551975011825562, "learning_rate": 6.647750421288398e-06, "loss": 0.8452, "step": 11645 }, { "epoch": 0.41135772598227727, "grad_norm": 2.3325865268707275, "learning_rate": 6.647210354190175e-06, "loss": 0.8057, "step": 11646 }, { "epoch": 0.4113930477859852, "grad_norm": 1.7014939785003662, "learning_rate": 6.646670265533573e-06, "loss": 0.8144, "step": 11647 }, { "epoch": 0.4114283695896931, "grad_norm": 1.6766520738601685, "learning_rate": 6.646130155325659e-06, "loss": 0.8359, "step": 11648 }, { "epoch": 0.411463691393401, "grad_norm": 1.8802186250686646, "learning_rate": 6.6455900235735034e-06, "loss": 0.7832, "step": 11649 }, { "epoch": 0.4114990131971089, "grad_norm": 3.002131700515747, "learning_rate": 6.645049870284176e-06, "loss": 0.8082, "step": 11650 }, { "epoch": 0.4115343350008168, "grad_norm": 2.0217645168304443, "learning_rate": 6.644509695464744e-06, "loss": 0.8374, "step": 11651 }, { "epoch": 0.41156965680452473, "grad_norm": 1.590578317642212, "learning_rate": 6.643969499122278e-06, "loss": 0.8257, "step": 11652 }, { "epoch": 0.41160497860823264, "grad_norm": 1.7190569639205933, "learning_rate": 6.64342928126385e-06, "loss": 0.7918, "step": 11653 }, { "epoch": 0.41164030041194055, "grad_norm": 1.6084619760513306, "learning_rate": 6.642889041896527e-06, "loss": 0.8062, "step": 11654 }, { "epoch": 0.41167562221564846, "grad_norm": 1.7026503086090088, "learning_rate": 6.642348781027381e-06, "loss": 0.8327, "step": 11655 }, { "epoch": 0.4117109440193564, "grad_norm": 1.76325523853302, "learning_rate": 6.641808498663483e-06, "loss": 0.8123, "step": 11656 }, { "epoch": 0.41174626582306423, "grad_norm": 1.7355852127075195, "learning_rate": 6.641268194811904e-06, "loss": 0.819, "step": 11657 }, { "epoch": 0.41178158762677214, "grad_norm": 1.466806411743164, "learning_rate": 6.640727869479716e-06, "loss": 0.8335, "step": 11658 }, { "epoch": 0.41181690943048005, "grad_norm": 1.5436155796051025, "learning_rate": 6.64018752267399e-06, "loss": 0.8219, "step": 11659 }, { "epoch": 0.41185223123418796, "grad_norm": 1.736846923828125, "learning_rate": 6.639647154401798e-06, "loss": 0.8095, "step": 11660 }, { "epoch": 0.41188755303789587, "grad_norm": 1.6723108291625977, "learning_rate": 6.639106764670212e-06, "loss": 0.8223, "step": 11661 }, { "epoch": 0.4119228748416038, "grad_norm": 3.4251749515533447, "learning_rate": 6.638566353486304e-06, "loss": 0.7582, "step": 11662 }, { "epoch": 0.4119581966453117, "grad_norm": 1.7047808170318604, "learning_rate": 6.63802592085715e-06, "loss": 0.8068, "step": 11663 }, { "epoch": 0.4119935184490196, "grad_norm": 1.5469093322753906, "learning_rate": 6.637485466789818e-06, "loss": 0.7953, "step": 11664 }, { "epoch": 0.4120288402527275, "grad_norm": 1.5673013925552368, "learning_rate": 6.6369449912913855e-06, "loss": 0.7843, "step": 11665 }, { "epoch": 0.4120641620564354, "grad_norm": 1.9565377235412598, "learning_rate": 6.636404494368924e-06, "loss": 0.7714, "step": 11666 }, { "epoch": 0.41209948386014333, "grad_norm": 1.6693387031555176, "learning_rate": 6.635863976029508e-06, "loss": 0.7874, "step": 11667 }, { "epoch": 0.41213480566385124, "grad_norm": 1.6442726850509644, "learning_rate": 6.635323436280212e-06, "loss": 0.8076, "step": 11668 }, { "epoch": 0.41217012746755916, "grad_norm": 1.689563274383545, "learning_rate": 6.634782875128111e-06, "loss": 0.7856, "step": 11669 }, { "epoch": 0.412205449271267, "grad_norm": 1.2486039400100708, "learning_rate": 6.634242292580277e-06, "loss": 0.618, "step": 11670 }, { "epoch": 0.4122407710749749, "grad_norm": 1.629000186920166, "learning_rate": 6.63370168864379e-06, "loss": 0.8053, "step": 11671 }, { "epoch": 0.41227609287868283, "grad_norm": 1.703914761543274, "learning_rate": 6.633161063325722e-06, "loss": 0.8046, "step": 11672 }, { "epoch": 0.41231141468239074, "grad_norm": 1.6769826412200928, "learning_rate": 6.632620416633146e-06, "loss": 0.8234, "step": 11673 }, { "epoch": 0.41234673648609865, "grad_norm": 1.5754847526550293, "learning_rate": 6.632079748573143e-06, "loss": 0.8028, "step": 11674 }, { "epoch": 0.41238205828980656, "grad_norm": 1.8229103088378906, "learning_rate": 6.631539059152786e-06, "loss": 0.7966, "step": 11675 }, { "epoch": 0.4124173800935145, "grad_norm": 1.6022919416427612, "learning_rate": 6.630998348379154e-06, "loss": 0.806, "step": 11676 }, { "epoch": 0.4124527018972224, "grad_norm": 1.563090205192566, "learning_rate": 6.630457616259322e-06, "loss": 0.7968, "step": 11677 }, { "epoch": 0.4124880237009303, "grad_norm": 0.9725398421287537, "learning_rate": 6.629916862800366e-06, "loss": 0.5881, "step": 11678 }, { "epoch": 0.4125233455046382, "grad_norm": 1.8066965341567993, "learning_rate": 6.629376088009366e-06, "loss": 0.798, "step": 11679 }, { "epoch": 0.4125586673083461, "grad_norm": 2.0384833812713623, "learning_rate": 6.628835291893396e-06, "loss": 0.8009, "step": 11680 }, { "epoch": 0.412593989112054, "grad_norm": 1.731325387954712, "learning_rate": 6.628294474459538e-06, "loss": 0.8127, "step": 11681 }, { "epoch": 0.41262931091576194, "grad_norm": 1.6700679063796997, "learning_rate": 6.627753635714868e-06, "loss": 0.803, "step": 11682 }, { "epoch": 0.4126646327194698, "grad_norm": 1.755223035812378, "learning_rate": 6.627212775666463e-06, "loss": 0.801, "step": 11683 }, { "epoch": 0.4126999545231777, "grad_norm": 1.731977939605713, "learning_rate": 6.626671894321404e-06, "loss": 0.7873, "step": 11684 }, { "epoch": 0.4127352763268856, "grad_norm": 1.766303539276123, "learning_rate": 6.626130991686768e-06, "loss": 0.8418, "step": 11685 }, { "epoch": 0.4127705981305935, "grad_norm": 1.587828516960144, "learning_rate": 6.625590067769636e-06, "loss": 0.794, "step": 11686 }, { "epoch": 0.41280591993430144, "grad_norm": 1.6744791269302368, "learning_rate": 6.625049122577087e-06, "loss": 0.8365, "step": 11687 }, { "epoch": 0.41284124173800935, "grad_norm": 1.8517063856124878, "learning_rate": 6.6245081561162e-06, "loss": 0.8223, "step": 11688 }, { "epoch": 0.41287656354171726, "grad_norm": 2.0742549896240234, "learning_rate": 6.623967168394057e-06, "loss": 0.806, "step": 11689 }, { "epoch": 0.41291188534542517, "grad_norm": 1.8126797676086426, "learning_rate": 6.623426159417735e-06, "loss": 0.8089, "step": 11690 }, { "epoch": 0.4129472071491331, "grad_norm": 1.5916272401809692, "learning_rate": 6.6228851291943164e-06, "loss": 0.8281, "step": 11691 }, { "epoch": 0.412982528952841, "grad_norm": 1.725393295288086, "learning_rate": 6.622344077730884e-06, "loss": 0.8065, "step": 11692 }, { "epoch": 0.4130178507565489, "grad_norm": 1.8782325983047485, "learning_rate": 6.621803005034517e-06, "loss": 0.7974, "step": 11693 }, { "epoch": 0.4130531725602568, "grad_norm": 1.7525746822357178, "learning_rate": 6.6212619111122954e-06, "loss": 0.8047, "step": 11694 }, { "epoch": 0.4130884943639647, "grad_norm": 1.9248119592666626, "learning_rate": 6.620720795971306e-06, "loss": 0.7842, "step": 11695 }, { "epoch": 0.4131238161676726, "grad_norm": 1.7135206460952759, "learning_rate": 6.620179659618624e-06, "loss": 0.8376, "step": 11696 }, { "epoch": 0.4131591379713805, "grad_norm": 1.5103087425231934, "learning_rate": 6.619638502061337e-06, "loss": 0.7721, "step": 11697 }, { "epoch": 0.4131944597750884, "grad_norm": 1.6351805925369263, "learning_rate": 6.619097323306527e-06, "loss": 0.7611, "step": 11698 }, { "epoch": 0.4132297815787963, "grad_norm": 1.9550305604934692, "learning_rate": 6.618556123361273e-06, "loss": 0.8285, "step": 11699 }, { "epoch": 0.4132651033825042, "grad_norm": 1.6513831615447998, "learning_rate": 6.618014902232661e-06, "loss": 0.8063, "step": 11700 }, { "epoch": 0.41330042518621213, "grad_norm": 1.611197590827942, "learning_rate": 6.6174736599277755e-06, "loss": 0.7638, "step": 11701 }, { "epoch": 0.41333574698992004, "grad_norm": 1.6498976945877075, "learning_rate": 6.616932396453698e-06, "loss": 0.8284, "step": 11702 }, { "epoch": 0.41337106879362795, "grad_norm": 1.5951019525527954, "learning_rate": 6.6163911118175125e-06, "loss": 0.8022, "step": 11703 }, { "epoch": 0.41340639059733586, "grad_norm": 1.631239891052246, "learning_rate": 6.615849806026305e-06, "loss": 0.8266, "step": 11704 }, { "epoch": 0.41344171240104377, "grad_norm": 1.7551053762435913, "learning_rate": 6.6153084790871605e-06, "loss": 0.7605, "step": 11705 }, { "epoch": 0.4134770342047517, "grad_norm": 1.7387912273406982, "learning_rate": 6.61476713100716e-06, "loss": 0.8169, "step": 11706 }, { "epoch": 0.4135123560084596, "grad_norm": 1.7294986248016357, "learning_rate": 6.614225761793391e-06, "loss": 0.8135, "step": 11707 }, { "epoch": 0.4135476778121675, "grad_norm": 1.7654248476028442, "learning_rate": 6.61368437145294e-06, "loss": 0.815, "step": 11708 }, { "epoch": 0.41358299961587536, "grad_norm": 1.6817408800125122, "learning_rate": 6.613142959992889e-06, "loss": 0.8156, "step": 11709 }, { "epoch": 0.41361832141958327, "grad_norm": 1.740054726600647, "learning_rate": 6.612601527420328e-06, "loss": 0.8419, "step": 11710 }, { "epoch": 0.4136536432232912, "grad_norm": 1.8712279796600342, "learning_rate": 6.6120600737423415e-06, "loss": 0.8167, "step": 11711 }, { "epoch": 0.4136889650269991, "grad_norm": 2.3925669193267822, "learning_rate": 6.611518598966015e-06, "loss": 0.8153, "step": 11712 }, { "epoch": 0.413724286830707, "grad_norm": 1.7477329969406128, "learning_rate": 6.610977103098436e-06, "loss": 0.8211, "step": 11713 }, { "epoch": 0.4137596086344149, "grad_norm": 1.5757815837860107, "learning_rate": 6.610435586146692e-06, "loss": 0.8159, "step": 11714 }, { "epoch": 0.4137949304381228, "grad_norm": 1.8281084299087524, "learning_rate": 6.609894048117869e-06, "loss": 0.7942, "step": 11715 }, { "epoch": 0.41383025224183073, "grad_norm": 1.617150068283081, "learning_rate": 6.609352489019056e-06, "loss": 0.7965, "step": 11716 }, { "epoch": 0.41386557404553864, "grad_norm": 1.7396445274353027, "learning_rate": 6.608810908857341e-06, "loss": 0.8702, "step": 11717 }, { "epoch": 0.41390089584924655, "grad_norm": 1.7510920763015747, "learning_rate": 6.608269307639809e-06, "loss": 0.8198, "step": 11718 }, { "epoch": 0.41393621765295446, "grad_norm": 2.0613884925842285, "learning_rate": 6.607727685373553e-06, "loss": 0.837, "step": 11719 }, { "epoch": 0.4139715394566624, "grad_norm": 1.6432915925979614, "learning_rate": 6.607186042065658e-06, "loss": 0.815, "step": 11720 }, { "epoch": 0.4140068612603703, "grad_norm": 1.590525507926941, "learning_rate": 6.606644377723214e-06, "loss": 0.7966, "step": 11721 }, { "epoch": 0.41404218306407814, "grad_norm": 1.6257137060165405, "learning_rate": 6.606102692353311e-06, "loss": 0.7873, "step": 11722 }, { "epoch": 0.41407750486778605, "grad_norm": 1.766489028930664, "learning_rate": 6.605560985963038e-06, "loss": 0.8467, "step": 11723 }, { "epoch": 0.41411282667149396, "grad_norm": 2.4395666122436523, "learning_rate": 6.605019258559484e-06, "loss": 0.8094, "step": 11724 }, { "epoch": 0.41414814847520187, "grad_norm": 1.5712395906448364, "learning_rate": 6.604477510149739e-06, "loss": 0.828, "step": 11725 }, { "epoch": 0.4141834702789098, "grad_norm": 1.6087067127227783, "learning_rate": 6.6039357407408955e-06, "loss": 0.8019, "step": 11726 }, { "epoch": 0.4142187920826177, "grad_norm": 1.680315375328064, "learning_rate": 6.603393950340043e-06, "loss": 0.8339, "step": 11727 }, { "epoch": 0.4142541138863256, "grad_norm": 1.7682405710220337, "learning_rate": 6.60285213895427e-06, "loss": 0.8461, "step": 11728 }, { "epoch": 0.4142894356900335, "grad_norm": 1.6903244256973267, "learning_rate": 6.60231030659067e-06, "loss": 0.8131, "step": 11729 }, { "epoch": 0.4143247574937414, "grad_norm": 1.4700970649719238, "learning_rate": 6.601768453256334e-06, "loss": 0.805, "step": 11730 }, { "epoch": 0.41436007929744934, "grad_norm": 1.5432024002075195, "learning_rate": 6.601226578958353e-06, "loss": 0.8207, "step": 11731 }, { "epoch": 0.41439540110115725, "grad_norm": 1.62462317943573, "learning_rate": 6.6006846837038215e-06, "loss": 0.8402, "step": 11732 }, { "epoch": 0.41443072290486516, "grad_norm": 1.6944996118545532, "learning_rate": 6.600142767499829e-06, "loss": 0.8012, "step": 11733 }, { "epoch": 0.41446604470857307, "grad_norm": 1.6651906967163086, "learning_rate": 6.599600830353468e-06, "loss": 0.828, "step": 11734 }, { "epoch": 0.4145013665122809, "grad_norm": 1.7657140493392944, "learning_rate": 6.599058872271833e-06, "loss": 0.8252, "step": 11735 }, { "epoch": 0.41453668831598883, "grad_norm": 1.5315513610839844, "learning_rate": 6.598516893262015e-06, "loss": 0.782, "step": 11736 }, { "epoch": 0.41457201011969674, "grad_norm": 1.64136803150177, "learning_rate": 6.597974893331108e-06, "loss": 0.809, "step": 11737 }, { "epoch": 0.41460733192340465, "grad_norm": 1.9034433364868164, "learning_rate": 6.597432872486207e-06, "loss": 0.8065, "step": 11738 }, { "epoch": 0.41464265372711256, "grad_norm": 1.8330461978912354, "learning_rate": 6.596890830734404e-06, "loss": 0.8143, "step": 11739 }, { "epoch": 0.4146779755308205, "grad_norm": 1.6785720586776733, "learning_rate": 6.5963487680827955e-06, "loss": 0.7874, "step": 11740 }, { "epoch": 0.4147132973345284, "grad_norm": 1.6285624504089355, "learning_rate": 6.595806684538473e-06, "loss": 0.8249, "step": 11741 }, { "epoch": 0.4147486191382363, "grad_norm": 1.661611557006836, "learning_rate": 6.595264580108534e-06, "loss": 0.7962, "step": 11742 }, { "epoch": 0.4147839409419442, "grad_norm": 3.34320330619812, "learning_rate": 6.59472245480007e-06, "loss": 0.7973, "step": 11743 }, { "epoch": 0.4148192627456521, "grad_norm": 1.996195912361145, "learning_rate": 6.594180308620179e-06, "loss": 0.8034, "step": 11744 }, { "epoch": 0.41485458454936003, "grad_norm": 1.70701003074646, "learning_rate": 6.593638141575958e-06, "loss": 0.8171, "step": 11745 }, { "epoch": 0.41488990635306794, "grad_norm": 1.6702710390090942, "learning_rate": 6.593095953674498e-06, "loss": 0.8332, "step": 11746 }, { "epoch": 0.41492522815677585, "grad_norm": 1.9137789011001587, "learning_rate": 6.5925537449228984e-06, "loss": 0.7738, "step": 11747 }, { "epoch": 0.4149605499604837, "grad_norm": 1.6316440105438232, "learning_rate": 6.592011515328255e-06, "loss": 0.8162, "step": 11748 }, { "epoch": 0.4149958717641916, "grad_norm": 1.6918760538101196, "learning_rate": 6.591469264897663e-06, "loss": 0.7751, "step": 11749 }, { "epoch": 0.4150311935678995, "grad_norm": 1.5796934366226196, "learning_rate": 6.590926993638221e-06, "loss": 0.788, "step": 11750 }, { "epoch": 0.41506651537160744, "grad_norm": 1.6835078001022339, "learning_rate": 6.5903847015570266e-06, "loss": 0.8048, "step": 11751 }, { "epoch": 0.41510183717531535, "grad_norm": 1.6073428392410278, "learning_rate": 6.589842388661176e-06, "loss": 0.8364, "step": 11752 }, { "epoch": 0.41513715897902326, "grad_norm": 1.5737695693969727, "learning_rate": 6.5893000549577666e-06, "loss": 0.8339, "step": 11753 }, { "epoch": 0.41517248078273117, "grad_norm": 1.825904369354248, "learning_rate": 6.588757700453896e-06, "loss": 0.8129, "step": 11754 }, { "epoch": 0.4152078025864391, "grad_norm": 1.574249505996704, "learning_rate": 6.588215325156665e-06, "loss": 0.7851, "step": 11755 }, { "epoch": 0.415243124390147, "grad_norm": 1.6720401048660278, "learning_rate": 6.58767292907317e-06, "loss": 0.7898, "step": 11756 }, { "epoch": 0.4152784461938549, "grad_norm": 1.6079155206680298, "learning_rate": 6.587130512210509e-06, "loss": 0.7931, "step": 11757 }, { "epoch": 0.4153137679975628, "grad_norm": 1.8056670427322388, "learning_rate": 6.586588074575783e-06, "loss": 0.8235, "step": 11758 }, { "epoch": 0.4153490898012707, "grad_norm": 1.635209560394287, "learning_rate": 6.586045616176091e-06, "loss": 0.8281, "step": 11759 }, { "epoch": 0.41538441160497863, "grad_norm": 1.8871623277664185, "learning_rate": 6.585503137018532e-06, "loss": 0.7823, "step": 11760 }, { "epoch": 0.41541973340868654, "grad_norm": 1.8963661193847656, "learning_rate": 6.584960637110205e-06, "loss": 0.802, "step": 11761 }, { "epoch": 0.4154550552123944, "grad_norm": 1.6191892623901367, "learning_rate": 6.584418116458211e-06, "loss": 0.8018, "step": 11762 }, { "epoch": 0.4154903770161023, "grad_norm": 1.5159927606582642, "learning_rate": 6.583875575069651e-06, "loss": 0.7774, "step": 11763 }, { "epoch": 0.4155256988198102, "grad_norm": 1.6137598752975464, "learning_rate": 6.583333012951626e-06, "loss": 0.7935, "step": 11764 }, { "epoch": 0.41556102062351813, "grad_norm": 1.6904559135437012, "learning_rate": 6.582790430111234e-06, "loss": 0.7692, "step": 11765 }, { "epoch": 0.41559634242722604, "grad_norm": 1.6430041790008545, "learning_rate": 6.5822478265555815e-06, "loss": 0.8011, "step": 11766 }, { "epoch": 0.41563166423093395, "grad_norm": 1.6224493980407715, "learning_rate": 6.581705202291764e-06, "loss": 0.8065, "step": 11767 }, { "epoch": 0.41566698603464186, "grad_norm": 1.4954997301101685, "learning_rate": 6.581162557326888e-06, "loss": 0.8323, "step": 11768 }, { "epoch": 0.41570230783834977, "grad_norm": 1.5853170156478882, "learning_rate": 6.5806198916680545e-06, "loss": 0.859, "step": 11769 }, { "epoch": 0.4157376296420577, "grad_norm": 2.1277835369110107, "learning_rate": 6.580077205322361e-06, "loss": 0.8019, "step": 11770 }, { "epoch": 0.4157729514457656, "grad_norm": 1.6158355474472046, "learning_rate": 6.579534498296918e-06, "loss": 0.8028, "step": 11771 }, { "epoch": 0.4158082732494735, "grad_norm": 1.647603154182434, "learning_rate": 6.578991770598822e-06, "loss": 0.8341, "step": 11772 }, { "epoch": 0.4158435950531814, "grad_norm": 1.6395493745803833, "learning_rate": 6.57844902223518e-06, "loss": 0.8162, "step": 11773 }, { "epoch": 0.4158789168568893, "grad_norm": 1.7548316717147827, "learning_rate": 6.577906253213093e-06, "loss": 0.8393, "step": 11774 }, { "epoch": 0.4159142386605972, "grad_norm": 1.7201379537582397, "learning_rate": 6.577363463539666e-06, "loss": 0.8128, "step": 11775 }, { "epoch": 0.4159495604643051, "grad_norm": 1.5036869049072266, "learning_rate": 6.576820653222003e-06, "loss": 0.7893, "step": 11776 }, { "epoch": 0.415984882268013, "grad_norm": 1.525123119354248, "learning_rate": 6.576277822267208e-06, "loss": 0.8148, "step": 11777 }, { "epoch": 0.4160202040717209, "grad_norm": 0.9770811200141907, "learning_rate": 6.575734970682383e-06, "loss": 0.5844, "step": 11778 }, { "epoch": 0.4160555258754288, "grad_norm": 1.6166620254516602, "learning_rate": 6.5751920984746375e-06, "loss": 0.8148, "step": 11779 }, { "epoch": 0.41609084767913673, "grad_norm": 1.6959893703460693, "learning_rate": 6.574649205651072e-06, "loss": 0.8215, "step": 11780 }, { "epoch": 0.41612616948284464, "grad_norm": 1.606773853302002, "learning_rate": 6.574106292218795e-06, "loss": 0.8117, "step": 11781 }, { "epoch": 0.41616149128655255, "grad_norm": 1.8136719465255737, "learning_rate": 6.57356335818491e-06, "loss": 0.8232, "step": 11782 }, { "epoch": 0.41619681309026046, "grad_norm": 1.6803056001663208, "learning_rate": 6.5730204035565235e-06, "loss": 0.803, "step": 11783 }, { "epoch": 0.4162321348939684, "grad_norm": 1.609086275100708, "learning_rate": 6.572477428340742e-06, "loss": 0.8126, "step": 11784 }, { "epoch": 0.4162674566976763, "grad_norm": 1.6809077262878418, "learning_rate": 6.571934432544672e-06, "loss": 0.8161, "step": 11785 }, { "epoch": 0.4163027785013842, "grad_norm": 1.6151542663574219, "learning_rate": 6.571391416175418e-06, "loss": 0.8088, "step": 11786 }, { "epoch": 0.4163381003050921, "grad_norm": 1.548306941986084, "learning_rate": 6.57084837924009e-06, "loss": 0.7788, "step": 11787 }, { "epoch": 0.41637342210879996, "grad_norm": 1.5414931774139404, "learning_rate": 6.570305321745794e-06, "loss": 0.8454, "step": 11788 }, { "epoch": 0.4164087439125079, "grad_norm": 1.5439475774765015, "learning_rate": 6.569762243699635e-06, "loss": 0.7951, "step": 11789 }, { "epoch": 0.4164440657162158, "grad_norm": 1.4758694171905518, "learning_rate": 6.569219145108725e-06, "loss": 0.8089, "step": 11790 }, { "epoch": 0.4164793875199237, "grad_norm": 1.6913400888442993, "learning_rate": 6.56867602598017e-06, "loss": 0.8407, "step": 11791 }, { "epoch": 0.4165147093236316, "grad_norm": 1.6543172597885132, "learning_rate": 6.568132886321077e-06, "loss": 0.7888, "step": 11792 }, { "epoch": 0.4165500311273395, "grad_norm": 1.7510944604873657, "learning_rate": 6.567589726138556e-06, "loss": 0.8338, "step": 11793 }, { "epoch": 0.4165853529310474, "grad_norm": 1.6883409023284912, "learning_rate": 6.567046545439714e-06, "loss": 0.8172, "step": 11794 }, { "epoch": 0.41662067473475534, "grad_norm": 1.7943633794784546, "learning_rate": 6.566503344231663e-06, "loss": 0.8309, "step": 11795 }, { "epoch": 0.41665599653846325, "grad_norm": 1.5855352878570557, "learning_rate": 6.565960122521508e-06, "loss": 0.8216, "step": 11796 }, { "epoch": 0.41669131834217116, "grad_norm": 1.7354958057403564, "learning_rate": 6.565416880316365e-06, "loss": 0.8335, "step": 11797 }, { "epoch": 0.41672664014587907, "grad_norm": 1.8077811002731323, "learning_rate": 6.56487361762334e-06, "loss": 0.8358, "step": 11798 }, { "epoch": 0.416761961949587, "grad_norm": 1.576043963432312, "learning_rate": 6.56433033444954e-06, "loss": 0.8056, "step": 11799 }, { "epoch": 0.4167972837532949, "grad_norm": 1.7972098588943481, "learning_rate": 6.563787030802081e-06, "loss": 0.8207, "step": 11800 }, { "epoch": 0.41683260555700274, "grad_norm": 2.3386547565460205, "learning_rate": 6.5632437066880695e-06, "loss": 0.7703, "step": 11801 }, { "epoch": 0.41686792736071066, "grad_norm": 1.8324285745620728, "learning_rate": 6.56270036211462e-06, "loss": 0.8142, "step": 11802 }, { "epoch": 0.41690324916441857, "grad_norm": 1.7969281673431396, "learning_rate": 6.562156997088842e-06, "loss": 0.7996, "step": 11803 }, { "epoch": 0.4169385709681265, "grad_norm": 1.8038362264633179, "learning_rate": 6.561613611617846e-06, "loss": 0.821, "step": 11804 }, { "epoch": 0.4169738927718344, "grad_norm": 1.6797852516174316, "learning_rate": 6.561070205708744e-06, "loss": 0.7796, "step": 11805 }, { "epoch": 0.4170092145755423, "grad_norm": 1.7499606609344482, "learning_rate": 6.56052677936865e-06, "loss": 0.796, "step": 11806 }, { "epoch": 0.4170445363792502, "grad_norm": 1.628594994544983, "learning_rate": 6.559983332604675e-06, "loss": 0.7843, "step": 11807 }, { "epoch": 0.4170798581829581, "grad_norm": 1.652829647064209, "learning_rate": 6.559439865423931e-06, "loss": 0.798, "step": 11808 }, { "epoch": 0.41711517998666603, "grad_norm": 1.5792723894119263, "learning_rate": 6.55889637783353e-06, "loss": 0.7988, "step": 11809 }, { "epoch": 0.41715050179037394, "grad_norm": 1.5310840606689453, "learning_rate": 6.558352869840588e-06, "loss": 0.7857, "step": 11810 }, { "epoch": 0.41718582359408185, "grad_norm": 1.7336604595184326, "learning_rate": 6.557809341452215e-06, "loss": 0.7735, "step": 11811 }, { "epoch": 0.41722114539778976, "grad_norm": 1.6069042682647705, "learning_rate": 6.557265792675527e-06, "loss": 0.7886, "step": 11812 }, { "epoch": 0.41725646720149767, "grad_norm": 1.8033134937286377, "learning_rate": 6.556722223517636e-06, "loss": 0.7957, "step": 11813 }, { "epoch": 0.4172917890052055, "grad_norm": 1.6975390911102295, "learning_rate": 6.556178633985659e-06, "loss": 0.8267, "step": 11814 }, { "epoch": 0.41732711080891344, "grad_norm": 1.6475294828414917, "learning_rate": 6.555635024086706e-06, "loss": 0.8266, "step": 11815 }, { "epoch": 0.41736243261262135, "grad_norm": 1.5437324047088623, "learning_rate": 6.555091393827898e-06, "loss": 0.7989, "step": 11816 }, { "epoch": 0.41739775441632926, "grad_norm": 1.099657654762268, "learning_rate": 6.554547743216343e-06, "loss": 0.5844, "step": 11817 }, { "epoch": 0.41743307622003717, "grad_norm": 10.096535682678223, "learning_rate": 6.554004072259161e-06, "loss": 0.8011, "step": 11818 }, { "epoch": 0.4174683980237451, "grad_norm": 1.5404142141342163, "learning_rate": 6.553460380963466e-06, "loss": 0.7287, "step": 11819 }, { "epoch": 0.417503719827453, "grad_norm": 1.9183183908462524, "learning_rate": 6.552916669336371e-06, "loss": 0.8235, "step": 11820 }, { "epoch": 0.4175390416311609, "grad_norm": 1.733284592628479, "learning_rate": 6.5523729373849965e-06, "loss": 0.8286, "step": 11821 }, { "epoch": 0.4175743634348688, "grad_norm": 1.7456196546554565, "learning_rate": 6.551829185116457e-06, "loss": 0.801, "step": 11822 }, { "epoch": 0.4176096852385767, "grad_norm": 2.1403093338012695, "learning_rate": 6.551285412537868e-06, "loss": 0.8141, "step": 11823 }, { "epoch": 0.41764500704228463, "grad_norm": 1.5408791303634644, "learning_rate": 6.550741619656348e-06, "loss": 0.8109, "step": 11824 }, { "epoch": 0.41768032884599254, "grad_norm": 1.9504073858261108, "learning_rate": 6.550197806479013e-06, "loss": 0.8291, "step": 11825 }, { "epoch": 0.41771565064970045, "grad_norm": 1.7258116006851196, "learning_rate": 6.549653973012981e-06, "loss": 0.7986, "step": 11826 }, { "epoch": 0.4177509724534083, "grad_norm": 1.7487767934799194, "learning_rate": 6.549110119265368e-06, "loss": 0.8269, "step": 11827 }, { "epoch": 0.4177862942571162, "grad_norm": 1.5736554861068726, "learning_rate": 6.548566245243293e-06, "loss": 0.8185, "step": 11828 }, { "epoch": 0.41782161606082413, "grad_norm": 1.5823113918304443, "learning_rate": 6.548022350953874e-06, "loss": 0.79, "step": 11829 }, { "epoch": 0.41785693786453204, "grad_norm": 1.8005321025848389, "learning_rate": 6.54747843640423e-06, "loss": 0.8453, "step": 11830 }, { "epoch": 0.41789225966823995, "grad_norm": 1.830110788345337, "learning_rate": 6.5469345016014786e-06, "loss": 0.8428, "step": 11831 }, { "epoch": 0.41792758147194786, "grad_norm": 1.8493843078613281, "learning_rate": 6.5463905465527414e-06, "loss": 0.8265, "step": 11832 }, { "epoch": 0.4179629032756558, "grad_norm": 1.523544430732727, "learning_rate": 6.545846571265132e-06, "loss": 0.8053, "step": 11833 }, { "epoch": 0.4179982250793637, "grad_norm": 1.6874653100967407, "learning_rate": 6.545302575745776e-06, "loss": 0.7739, "step": 11834 }, { "epoch": 0.4180335468830716, "grad_norm": 1.7841328382492065, "learning_rate": 6.544758560001789e-06, "loss": 0.7875, "step": 11835 }, { "epoch": 0.4180688686867795, "grad_norm": 1.6068347692489624, "learning_rate": 6.544214524040293e-06, "loss": 0.8477, "step": 11836 }, { "epoch": 0.4181041904904874, "grad_norm": 1.7181109189987183, "learning_rate": 6.543670467868407e-06, "loss": 0.816, "step": 11837 }, { "epoch": 0.4181395122941953, "grad_norm": 1.6005747318267822, "learning_rate": 6.543126391493252e-06, "loss": 0.7774, "step": 11838 }, { "epoch": 0.41817483409790324, "grad_norm": 1.6179969310760498, "learning_rate": 6.54258229492195e-06, "loss": 0.8066, "step": 11839 }, { "epoch": 0.4182101559016111, "grad_norm": 0.9776912927627563, "learning_rate": 6.54203817816162e-06, "loss": 0.5726, "step": 11840 }, { "epoch": 0.418245477705319, "grad_norm": 1.7003329992294312, "learning_rate": 6.541494041219383e-06, "loss": 0.8272, "step": 11841 }, { "epoch": 0.4182807995090269, "grad_norm": 1.687215805053711, "learning_rate": 6.5409498841023635e-06, "loss": 0.8304, "step": 11842 }, { "epoch": 0.4183161213127348, "grad_norm": 1.643612027168274, "learning_rate": 6.540405706817682e-06, "loss": 0.8139, "step": 11843 }, { "epoch": 0.41835144311644273, "grad_norm": 1.6886303424835205, "learning_rate": 6.53986150937246e-06, "loss": 0.8285, "step": 11844 }, { "epoch": 0.41838676492015064, "grad_norm": 1.041192650794983, "learning_rate": 6.539317291773819e-06, "loss": 0.599, "step": 11845 }, { "epoch": 0.41842208672385856, "grad_norm": 1.7501550912857056, "learning_rate": 6.538773054028882e-06, "loss": 0.8617, "step": 11846 }, { "epoch": 0.41845740852756647, "grad_norm": 1.6501365900039673, "learning_rate": 6.538228796144774e-06, "loss": 0.8177, "step": 11847 }, { "epoch": 0.4184927303312744, "grad_norm": 1.7924760580062866, "learning_rate": 6.537684518128616e-06, "loss": 0.8122, "step": 11848 }, { "epoch": 0.4185280521349823, "grad_norm": 1.4391002655029297, "learning_rate": 6.537140219987532e-06, "loss": 0.7482, "step": 11849 }, { "epoch": 0.4185633739386902, "grad_norm": 1.5607901811599731, "learning_rate": 6.536595901728646e-06, "loss": 0.7729, "step": 11850 }, { "epoch": 0.4185986957423981, "grad_norm": 1.7054738998413086, "learning_rate": 6.536051563359082e-06, "loss": 0.8465, "step": 11851 }, { "epoch": 0.418634017546106, "grad_norm": 1.7292336225509644, "learning_rate": 6.535507204885965e-06, "loss": 0.8317, "step": 11852 }, { "epoch": 0.4186693393498139, "grad_norm": 1.9710116386413574, "learning_rate": 6.5349628263164165e-06, "loss": 0.8414, "step": 11853 }, { "epoch": 0.4187046611535218, "grad_norm": 1.788771152496338, "learning_rate": 6.534418427657563e-06, "loss": 0.7964, "step": 11854 }, { "epoch": 0.4187399829572297, "grad_norm": 1.860124945640564, "learning_rate": 6.5338740089165294e-06, "loss": 0.8152, "step": 11855 }, { "epoch": 0.4187753047609376, "grad_norm": 1.9007132053375244, "learning_rate": 6.533329570100443e-06, "loss": 0.844, "step": 11856 }, { "epoch": 0.4188106265646455, "grad_norm": 1.6934752464294434, "learning_rate": 6.532785111216426e-06, "loss": 0.7813, "step": 11857 }, { "epoch": 0.4188459483683534, "grad_norm": 1.559054970741272, "learning_rate": 6.532240632271604e-06, "loss": 0.7886, "step": 11858 }, { "epoch": 0.41888127017206134, "grad_norm": 1.8577286005020142, "learning_rate": 6.531696133273108e-06, "loss": 0.7819, "step": 11859 }, { "epoch": 0.41891659197576925, "grad_norm": 1.7574654817581177, "learning_rate": 6.531151614228059e-06, "loss": 0.8384, "step": 11860 }, { "epoch": 0.41895191377947716, "grad_norm": 2.0308752059936523, "learning_rate": 6.530607075143586e-06, "loss": 0.8492, "step": 11861 }, { "epoch": 0.41898723558318507, "grad_norm": 1.6810131072998047, "learning_rate": 6.530062516026815e-06, "loss": 0.8014, "step": 11862 }, { "epoch": 0.419022557386893, "grad_norm": 1.683520793914795, "learning_rate": 6.529517936884874e-06, "loss": 0.7993, "step": 11863 }, { "epoch": 0.4190578791906009, "grad_norm": 1.6796023845672607, "learning_rate": 6.52897333772489e-06, "loss": 0.8149, "step": 11864 }, { "epoch": 0.4190932009943088, "grad_norm": 1.5941911935806274, "learning_rate": 6.528428718553989e-06, "loss": 0.8207, "step": 11865 }, { "epoch": 0.41912852279801666, "grad_norm": 1.631784200668335, "learning_rate": 6.527884079379301e-06, "loss": 0.8243, "step": 11866 }, { "epoch": 0.41916384460172457, "grad_norm": 1.5805693864822388, "learning_rate": 6.527339420207952e-06, "loss": 0.7961, "step": 11867 }, { "epoch": 0.4191991664054325, "grad_norm": 1.6655954122543335, "learning_rate": 6.526794741047073e-06, "loss": 0.7929, "step": 11868 }, { "epoch": 0.4192344882091404, "grad_norm": 1.5654959678649902, "learning_rate": 6.5262500419037935e-06, "loss": 0.7854, "step": 11869 }, { "epoch": 0.4192698100128483, "grad_norm": 1.5929372310638428, "learning_rate": 6.525705322785237e-06, "loss": 0.7785, "step": 11870 }, { "epoch": 0.4193051318165562, "grad_norm": 1.828216314315796, "learning_rate": 6.525160583698538e-06, "loss": 0.767, "step": 11871 }, { "epoch": 0.4193404536202641, "grad_norm": 1.9100111722946167, "learning_rate": 6.524615824650824e-06, "loss": 0.8294, "step": 11872 }, { "epoch": 0.41937577542397203, "grad_norm": 1.7065438032150269, "learning_rate": 6.524071045649224e-06, "loss": 0.8273, "step": 11873 }, { "epoch": 0.41941109722767994, "grad_norm": 2.2023427486419678, "learning_rate": 6.523526246700869e-06, "loss": 0.7798, "step": 11874 }, { "epoch": 0.41944641903138785, "grad_norm": 1.7798470258712769, "learning_rate": 6.522981427812888e-06, "loss": 0.8426, "step": 11875 }, { "epoch": 0.41948174083509576, "grad_norm": 1.6777900457382202, "learning_rate": 6.522436588992413e-06, "loss": 0.7726, "step": 11876 }, { "epoch": 0.4195170626388037, "grad_norm": 1.6990281343460083, "learning_rate": 6.521891730246574e-06, "loss": 0.7953, "step": 11877 }, { "epoch": 0.4195523844425116, "grad_norm": 1.5541250705718994, "learning_rate": 6.5213468515825015e-06, "loss": 0.8064, "step": 11878 }, { "epoch": 0.41958770624621944, "grad_norm": 1.6614960432052612, "learning_rate": 6.5208019530073295e-06, "loss": 0.8149, "step": 11879 }, { "epoch": 0.41962302804992735, "grad_norm": 1.6059105396270752, "learning_rate": 6.520257034528186e-06, "loss": 0.8237, "step": 11880 }, { "epoch": 0.41965834985363526, "grad_norm": 1.9267024993896484, "learning_rate": 6.5197120961522045e-06, "loss": 0.8273, "step": 11881 }, { "epoch": 0.41969367165734317, "grad_norm": 1.567952275276184, "learning_rate": 6.519167137886517e-06, "loss": 0.7918, "step": 11882 }, { "epoch": 0.4197289934610511, "grad_norm": 1.7105282545089722, "learning_rate": 6.518622159738254e-06, "loss": 0.8459, "step": 11883 }, { "epoch": 0.419764315264759, "grad_norm": 1.7936224937438965, "learning_rate": 6.518077161714551e-06, "loss": 0.8008, "step": 11884 }, { "epoch": 0.4197996370684669, "grad_norm": 2.020386219024658, "learning_rate": 6.517532143822539e-06, "loss": 0.8024, "step": 11885 }, { "epoch": 0.4198349588721748, "grad_norm": 1.639183759689331, "learning_rate": 6.516987106069351e-06, "loss": 0.7861, "step": 11886 }, { "epoch": 0.4198702806758827, "grad_norm": 1.708672046661377, "learning_rate": 6.516442048462123e-06, "loss": 0.8252, "step": 11887 }, { "epoch": 0.41990560247959063, "grad_norm": 1.6808723211288452, "learning_rate": 6.515896971007985e-06, "loss": 0.7641, "step": 11888 }, { "epoch": 0.41994092428329854, "grad_norm": 1.7202317714691162, "learning_rate": 6.515351873714073e-06, "loss": 0.793, "step": 11889 }, { "epoch": 0.41997624608700646, "grad_norm": 1.9237430095672607, "learning_rate": 6.514806756587521e-06, "loss": 0.8492, "step": 11890 }, { "epoch": 0.42001156789071437, "grad_norm": 1.69086754322052, "learning_rate": 6.5142616196354605e-06, "loss": 0.7981, "step": 11891 }, { "epoch": 0.4200468896944222, "grad_norm": 1.4775675535202026, "learning_rate": 6.5137164628650305e-06, "loss": 0.804, "step": 11892 }, { "epoch": 0.42008221149813013, "grad_norm": 1.624191403388977, "learning_rate": 6.513171286283364e-06, "loss": 0.7952, "step": 11893 }, { "epoch": 0.42011753330183804, "grad_norm": 1.8044531345367432, "learning_rate": 6.512626089897596e-06, "loss": 0.8166, "step": 11894 }, { "epoch": 0.42015285510554595, "grad_norm": 1.6651368141174316, "learning_rate": 6.512080873714861e-06, "loss": 0.8287, "step": 11895 }, { "epoch": 0.42018817690925386, "grad_norm": 1.5962756872177124, "learning_rate": 6.511535637742296e-06, "loss": 0.8025, "step": 11896 }, { "epoch": 0.4202234987129618, "grad_norm": 1.5730717182159424, "learning_rate": 6.510990381987038e-06, "loss": 0.8183, "step": 11897 }, { "epoch": 0.4202588205166697, "grad_norm": 1.7060836553573608, "learning_rate": 6.510445106456222e-06, "loss": 0.7788, "step": 11898 }, { "epoch": 0.4202941423203776, "grad_norm": 1.5772005319595337, "learning_rate": 6.5098998111569836e-06, "loss": 0.8024, "step": 11899 }, { "epoch": 0.4203294641240855, "grad_norm": 1.5278266668319702, "learning_rate": 6.509354496096461e-06, "loss": 0.7701, "step": 11900 }, { "epoch": 0.4203647859277934, "grad_norm": 1.6468701362609863, "learning_rate": 6.508809161281788e-06, "loss": 0.7868, "step": 11901 }, { "epoch": 0.4204001077315013, "grad_norm": 1.5803622007369995, "learning_rate": 6.508263806720106e-06, "loss": 0.8023, "step": 11902 }, { "epoch": 0.42043542953520924, "grad_norm": 1.7889783382415771, "learning_rate": 6.5077184324185515e-06, "loss": 0.779, "step": 11903 }, { "epoch": 0.42047075133891715, "grad_norm": 1.762203574180603, "learning_rate": 6.50717303838426e-06, "loss": 0.8361, "step": 11904 }, { "epoch": 0.420506073142625, "grad_norm": 1.671671748161316, "learning_rate": 6.506627624624373e-06, "loss": 0.7877, "step": 11905 }, { "epoch": 0.4205413949463329, "grad_norm": 1.6477501392364502, "learning_rate": 6.506082191146026e-06, "loss": 0.7795, "step": 11906 }, { "epoch": 0.4205767167500408, "grad_norm": 1.608505368232727, "learning_rate": 6.505536737956358e-06, "loss": 0.7967, "step": 11907 }, { "epoch": 0.42061203855374873, "grad_norm": 1.5699875354766846, "learning_rate": 6.5049912650625106e-06, "loss": 0.8059, "step": 11908 }, { "epoch": 0.42064736035745665, "grad_norm": 1.5371710062026978, "learning_rate": 6.504445772471617e-06, "loss": 0.7971, "step": 11909 }, { "epoch": 0.42068268216116456, "grad_norm": 1.6190671920776367, "learning_rate": 6.5039002601908215e-06, "loss": 0.8185, "step": 11910 }, { "epoch": 0.42071800396487247, "grad_norm": 1.5964394807815552, "learning_rate": 6.503354728227263e-06, "loss": 0.8036, "step": 11911 }, { "epoch": 0.4207533257685804, "grad_norm": 1.5332353115081787, "learning_rate": 6.502809176588077e-06, "loss": 0.7973, "step": 11912 }, { "epoch": 0.4207886475722883, "grad_norm": 1.8037306070327759, "learning_rate": 6.50226360528041e-06, "loss": 0.8257, "step": 11913 }, { "epoch": 0.4208239693759962, "grad_norm": 1.5754154920578003, "learning_rate": 6.5017180143114e-06, "loss": 0.7756, "step": 11914 }, { "epoch": 0.4208592911797041, "grad_norm": 1.6513397693634033, "learning_rate": 6.501172403688184e-06, "loss": 0.8255, "step": 11915 }, { "epoch": 0.420894612983412, "grad_norm": 1.6136000156402588, "learning_rate": 6.500626773417909e-06, "loss": 0.8226, "step": 11916 }, { "epoch": 0.42092993478711993, "grad_norm": 1.6781306266784668, "learning_rate": 6.500081123507711e-06, "loss": 0.845, "step": 11917 }, { "epoch": 0.4209652565908278, "grad_norm": 1.6310995817184448, "learning_rate": 6.499535453964734e-06, "loss": 0.8118, "step": 11918 }, { "epoch": 0.4210005783945357, "grad_norm": 1.6444475650787354, "learning_rate": 6.4989897647961175e-06, "loss": 0.8235, "step": 11919 }, { "epoch": 0.4210359001982436, "grad_norm": 1.6483443975448608, "learning_rate": 6.498444056009006e-06, "loss": 0.8096, "step": 11920 }, { "epoch": 0.4210712220019515, "grad_norm": 0.9379848837852478, "learning_rate": 6.497898327610539e-06, "loss": 0.6022, "step": 11921 }, { "epoch": 0.42110654380565943, "grad_norm": 2.082904577255249, "learning_rate": 6.497352579607862e-06, "loss": 0.8397, "step": 11922 }, { "epoch": 0.42114186560936734, "grad_norm": 1.7380993366241455, "learning_rate": 6.496806812008114e-06, "loss": 0.806, "step": 11923 }, { "epoch": 0.42117718741307525, "grad_norm": 1.7170103788375854, "learning_rate": 6.496261024818441e-06, "loss": 0.853, "step": 11924 }, { "epoch": 0.42121250921678316, "grad_norm": 1.7382926940917969, "learning_rate": 6.495715218045985e-06, "loss": 0.7696, "step": 11925 }, { "epoch": 0.42124783102049107, "grad_norm": 1.7450381517410278, "learning_rate": 6.49516939169789e-06, "loss": 0.8302, "step": 11926 }, { "epoch": 0.421283152824199, "grad_norm": 2.2056472301483154, "learning_rate": 6.494623545781299e-06, "loss": 0.8188, "step": 11927 }, { "epoch": 0.4213184746279069, "grad_norm": 2.057236671447754, "learning_rate": 6.494077680303354e-06, "loss": 0.7964, "step": 11928 }, { "epoch": 0.4213537964316148, "grad_norm": 1.7444666624069214, "learning_rate": 6.493531795271203e-06, "loss": 0.8019, "step": 11929 }, { "epoch": 0.4213891182353227, "grad_norm": 1.4878170490264893, "learning_rate": 6.4929858906919876e-06, "loss": 0.8366, "step": 11930 }, { "epoch": 0.42142444003903057, "grad_norm": 0.96390300989151, "learning_rate": 6.4924399665728545e-06, "loss": 0.5726, "step": 11931 }, { "epoch": 0.4214597618427385, "grad_norm": 1.6897468566894531, "learning_rate": 6.491894022920947e-06, "loss": 0.7837, "step": 11932 }, { "epoch": 0.4214950836464464, "grad_norm": 1.7485140562057495, "learning_rate": 6.491348059743411e-06, "loss": 0.8123, "step": 11933 }, { "epoch": 0.4215304054501543, "grad_norm": 1.6345247030258179, "learning_rate": 6.490802077047392e-06, "loss": 0.8203, "step": 11934 }, { "epoch": 0.4215657272538622, "grad_norm": 1.6787675619125366, "learning_rate": 6.490256074840036e-06, "loss": 0.7905, "step": 11935 }, { "epoch": 0.4216010490575701, "grad_norm": 0.9797080755233765, "learning_rate": 6.489710053128489e-06, "loss": 0.5987, "step": 11936 }, { "epoch": 0.42163637086127803, "grad_norm": 1.6765238046646118, "learning_rate": 6.489164011919896e-06, "loss": 0.8261, "step": 11937 }, { "epoch": 0.42167169266498594, "grad_norm": 1.5678609609603882, "learning_rate": 6.488617951221404e-06, "loss": 0.8281, "step": 11938 }, { "epoch": 0.42170701446869385, "grad_norm": 1.7223742008209229, "learning_rate": 6.488071871040161e-06, "loss": 0.8035, "step": 11939 }, { "epoch": 0.42174233627240176, "grad_norm": 1.817221999168396, "learning_rate": 6.487525771383314e-06, "loss": 0.8396, "step": 11940 }, { "epoch": 0.4217776580761097, "grad_norm": 1.6608664989471436, "learning_rate": 6.4869796522580064e-06, "loss": 0.8552, "step": 11941 }, { "epoch": 0.4218129798798176, "grad_norm": 1.6957989931106567, "learning_rate": 6.486433513671391e-06, "loss": 0.8288, "step": 11942 }, { "epoch": 0.4218483016835255, "grad_norm": 1.5713824033737183, "learning_rate": 6.4858873556306135e-06, "loss": 0.7959, "step": 11943 }, { "epoch": 0.42188362348723335, "grad_norm": 1.783027172088623, "learning_rate": 6.485341178142819e-06, "loss": 0.7893, "step": 11944 }, { "epoch": 0.42191894529094126, "grad_norm": 1.788483738899231, "learning_rate": 6.484794981215161e-06, "loss": 0.8254, "step": 11945 }, { "epoch": 0.42195426709464917, "grad_norm": 1.6143282651901245, "learning_rate": 6.484248764854783e-06, "loss": 0.8094, "step": 11946 }, { "epoch": 0.4219895888983571, "grad_norm": 1.6230063438415527, "learning_rate": 6.48370252906884e-06, "loss": 0.8198, "step": 11947 }, { "epoch": 0.422024910702065, "grad_norm": 1.606465220451355, "learning_rate": 6.4831562738644735e-06, "loss": 0.8137, "step": 11948 }, { "epoch": 0.4220602325057729, "grad_norm": 1.6512086391448975, "learning_rate": 6.482609999248838e-06, "loss": 0.8196, "step": 11949 }, { "epoch": 0.4220955543094808, "grad_norm": 1.7333663702011108, "learning_rate": 6.482063705229081e-06, "loss": 0.7953, "step": 11950 }, { "epoch": 0.4221308761131887, "grad_norm": 1.6491621732711792, "learning_rate": 6.481517391812353e-06, "loss": 0.8258, "step": 11951 }, { "epoch": 0.42216619791689663, "grad_norm": 1.801382303237915, "learning_rate": 6.480971059005804e-06, "loss": 0.7863, "step": 11952 }, { "epoch": 0.42220151972060455, "grad_norm": 1.5800845623016357, "learning_rate": 6.480424706816584e-06, "loss": 0.828, "step": 11953 }, { "epoch": 0.42223684152431246, "grad_norm": 1.88640558719635, "learning_rate": 6.479878335251844e-06, "loss": 0.7981, "step": 11954 }, { "epoch": 0.42227216332802037, "grad_norm": 1.532357096672058, "learning_rate": 6.479331944318735e-06, "loss": 0.77, "step": 11955 }, { "epoch": 0.4223074851317283, "grad_norm": 1.5204890966415405, "learning_rate": 6.478785534024408e-06, "loss": 0.7694, "step": 11956 }, { "epoch": 0.42234280693543613, "grad_norm": 1.6638866662979126, "learning_rate": 6.478239104376012e-06, "loss": 0.8223, "step": 11957 }, { "epoch": 0.42237812873914404, "grad_norm": 1.8607014417648315, "learning_rate": 6.4776926553807015e-06, "loss": 0.8074, "step": 11958 }, { "epoch": 0.42241345054285195, "grad_norm": 1.7015666961669922, "learning_rate": 6.477146187045626e-06, "loss": 0.8029, "step": 11959 }, { "epoch": 0.42244877234655986, "grad_norm": 1.4566731452941895, "learning_rate": 6.476599699377941e-06, "loss": 0.8059, "step": 11960 }, { "epoch": 0.4224840941502678, "grad_norm": 1.6546348333358765, "learning_rate": 6.476053192384795e-06, "loss": 0.8137, "step": 11961 }, { "epoch": 0.4225194159539757, "grad_norm": 1.82146155834198, "learning_rate": 6.475506666073343e-06, "loss": 0.8061, "step": 11962 }, { "epoch": 0.4225547377576836, "grad_norm": 1.5663758516311646, "learning_rate": 6.474960120450739e-06, "loss": 0.7542, "step": 11963 }, { "epoch": 0.4225900595613915, "grad_norm": 1.8184523582458496, "learning_rate": 6.474413555524132e-06, "loss": 0.8088, "step": 11964 }, { "epoch": 0.4226253813650994, "grad_norm": 1.567708969116211, "learning_rate": 6.473866971300678e-06, "loss": 0.816, "step": 11965 }, { "epoch": 0.42266070316880733, "grad_norm": 1.8275004625320435, "learning_rate": 6.47332036778753e-06, "loss": 0.7992, "step": 11966 }, { "epoch": 0.42269602497251524, "grad_norm": 1.6801142692565918, "learning_rate": 6.472773744991841e-06, "loss": 0.8191, "step": 11967 }, { "epoch": 0.42273134677622315, "grad_norm": 1.5022802352905273, "learning_rate": 6.472227102920767e-06, "loss": 0.813, "step": 11968 }, { "epoch": 0.42276666857993106, "grad_norm": 1.6121666431427002, "learning_rate": 6.471680441581463e-06, "loss": 0.7975, "step": 11969 }, { "epoch": 0.4228019903836389, "grad_norm": 1.7563635110855103, "learning_rate": 6.4711337609810795e-06, "loss": 0.8207, "step": 11970 }, { "epoch": 0.4228373121873468, "grad_norm": 1.5694313049316406, "learning_rate": 6.470587061126775e-06, "loss": 0.8625, "step": 11971 }, { "epoch": 0.42287263399105474, "grad_norm": 1.6681524515151978, "learning_rate": 6.470040342025704e-06, "loss": 0.7861, "step": 11972 }, { "epoch": 0.42290795579476265, "grad_norm": 1.6628310680389404, "learning_rate": 6.469493603685021e-06, "loss": 0.8187, "step": 11973 }, { "epoch": 0.42294327759847056, "grad_norm": 2.1785647869110107, "learning_rate": 6.468946846111882e-06, "loss": 0.8049, "step": 11974 }, { "epoch": 0.42297859940217847, "grad_norm": 1.7719444036483765, "learning_rate": 6.468400069313442e-06, "loss": 0.8184, "step": 11975 }, { "epoch": 0.4230139212058864, "grad_norm": 1.5122017860412598, "learning_rate": 6.467853273296859e-06, "loss": 0.7781, "step": 11976 }, { "epoch": 0.4230492430095943, "grad_norm": 1.5764487981796265, "learning_rate": 6.4673064580692875e-06, "loss": 0.8243, "step": 11977 }, { "epoch": 0.4230845648133022, "grad_norm": 1.000326156616211, "learning_rate": 6.466759623637885e-06, "loss": 0.5802, "step": 11978 }, { "epoch": 0.4231198866170101, "grad_norm": 1.7644914388656616, "learning_rate": 6.466212770009809e-06, "loss": 0.7869, "step": 11979 }, { "epoch": 0.423155208420718, "grad_norm": 1.707902193069458, "learning_rate": 6.465665897192215e-06, "loss": 0.8138, "step": 11980 }, { "epoch": 0.42319053022442593, "grad_norm": 1.7100253105163574, "learning_rate": 6.465119005192261e-06, "loss": 0.8046, "step": 11981 }, { "epoch": 0.42322585202813384, "grad_norm": 1.5862761735916138, "learning_rate": 6.464572094017107e-06, "loss": 0.8167, "step": 11982 }, { "epoch": 0.4232611738318417, "grad_norm": 1.61016047000885, "learning_rate": 6.464025163673906e-06, "loss": 0.8138, "step": 11983 }, { "epoch": 0.4232964956355496, "grad_norm": 2.224346160888672, "learning_rate": 6.463478214169821e-06, "loss": 0.7941, "step": 11984 }, { "epoch": 0.4233318174392575, "grad_norm": 1.7305675745010376, "learning_rate": 6.462931245512008e-06, "loss": 0.7932, "step": 11985 }, { "epoch": 0.42336713924296543, "grad_norm": 1.5657336711883545, "learning_rate": 6.462384257707623e-06, "loss": 0.7856, "step": 11986 }, { "epoch": 0.42340246104667334, "grad_norm": 1.509117841720581, "learning_rate": 6.4618372507638305e-06, "loss": 0.8099, "step": 11987 }, { "epoch": 0.42343778285038125, "grad_norm": 1.6148289442062378, "learning_rate": 6.461290224687786e-06, "loss": 0.7946, "step": 11988 }, { "epoch": 0.42347310465408916, "grad_norm": 1.664478063583374, "learning_rate": 6.460743179486651e-06, "loss": 0.8135, "step": 11989 }, { "epoch": 0.42350842645779707, "grad_norm": 1.6348062753677368, "learning_rate": 6.460196115167583e-06, "loss": 0.7961, "step": 11990 }, { "epoch": 0.423543748261505, "grad_norm": 2.013327121734619, "learning_rate": 6.4596490317377424e-06, "loss": 0.8147, "step": 11991 }, { "epoch": 0.4235790700652129, "grad_norm": 1.5473837852478027, "learning_rate": 6.45910192920429e-06, "loss": 0.7792, "step": 11992 }, { "epoch": 0.4236143918689208, "grad_norm": 2.1378300189971924, "learning_rate": 6.4585548075743844e-06, "loss": 0.8456, "step": 11993 }, { "epoch": 0.4236497136726287, "grad_norm": 1.784176230430603, "learning_rate": 6.458007666855189e-06, "loss": 0.8035, "step": 11994 }, { "epoch": 0.4236850354763366, "grad_norm": 1.5173808336257935, "learning_rate": 6.457460507053864e-06, "loss": 0.7823, "step": 11995 }, { "epoch": 0.4237203572800445, "grad_norm": 1.597029685974121, "learning_rate": 6.456913328177569e-06, "loss": 0.7825, "step": 11996 }, { "epoch": 0.4237556790837524, "grad_norm": 1.6640669107437134, "learning_rate": 6.4563661302334655e-06, "loss": 0.8232, "step": 11997 }, { "epoch": 0.4237910008874603, "grad_norm": 1.5541731119155884, "learning_rate": 6.455818913228717e-06, "loss": 0.7977, "step": 11998 }, { "epoch": 0.4238263226911682, "grad_norm": 1.6681592464447021, "learning_rate": 6.4552716771704835e-06, "loss": 0.7773, "step": 11999 }, { "epoch": 0.4238616444948761, "grad_norm": 1.773486614227295, "learning_rate": 6.45472442206593e-06, "loss": 0.8047, "step": 12000 }, { "epoch": 0.42389696629858403, "grad_norm": 1.7420263290405273, "learning_rate": 6.454177147922214e-06, "loss": 0.8075, "step": 12001 }, { "epoch": 0.42393228810229194, "grad_norm": 1.7144302129745483, "learning_rate": 6.453629854746502e-06, "loss": 0.8165, "step": 12002 }, { "epoch": 0.42396760990599985, "grad_norm": 1.6577718257904053, "learning_rate": 6.4530825425459564e-06, "loss": 0.7893, "step": 12003 }, { "epoch": 0.42400293170970776, "grad_norm": 1.7904032468795776, "learning_rate": 6.452535211327737e-06, "loss": 0.8467, "step": 12004 }, { "epoch": 0.4240382535134157, "grad_norm": 2.1695873737335205, "learning_rate": 6.451987861099012e-06, "loss": 0.8114, "step": 12005 }, { "epoch": 0.4240735753171236, "grad_norm": 1.7630969285964966, "learning_rate": 6.451440491866943e-06, "loss": 0.8178, "step": 12006 }, { "epoch": 0.4241088971208315, "grad_norm": 1.550041675567627, "learning_rate": 6.450893103638692e-06, "loss": 0.7995, "step": 12007 }, { "epoch": 0.4241442189245394, "grad_norm": 1.604706048965454, "learning_rate": 6.450345696421427e-06, "loss": 0.7995, "step": 12008 }, { "epoch": 0.42417954072824726, "grad_norm": 1.7004040479660034, "learning_rate": 6.449798270222309e-06, "loss": 0.8223, "step": 12009 }, { "epoch": 0.4242148625319552, "grad_norm": 1.617387294769287, "learning_rate": 6.4492508250485045e-06, "loss": 0.831, "step": 12010 }, { "epoch": 0.4242501843356631, "grad_norm": 1.6662620306015015, "learning_rate": 6.448703360907177e-06, "loss": 0.7994, "step": 12011 }, { "epoch": 0.424285506139371, "grad_norm": 1.5173296928405762, "learning_rate": 6.4481558778054905e-06, "loss": 0.8168, "step": 12012 }, { "epoch": 0.4243208279430789, "grad_norm": 1.6576889753341675, "learning_rate": 6.447608375750615e-06, "loss": 0.8411, "step": 12013 }, { "epoch": 0.4243561497467868, "grad_norm": 1.537912130355835, "learning_rate": 6.4470608547497116e-06, "loss": 0.7863, "step": 12014 }, { "epoch": 0.4243914715504947, "grad_norm": 2.3794491291046143, "learning_rate": 6.446513314809948e-06, "loss": 0.7932, "step": 12015 }, { "epoch": 0.42442679335420264, "grad_norm": 1.6964458227157593, "learning_rate": 6.445965755938491e-06, "loss": 0.8408, "step": 12016 }, { "epoch": 0.42446211515791055, "grad_norm": 2.9207658767700195, "learning_rate": 6.445418178142504e-06, "loss": 0.8012, "step": 12017 }, { "epoch": 0.42449743696161846, "grad_norm": 1.719038724899292, "learning_rate": 6.444870581429157e-06, "loss": 0.8305, "step": 12018 }, { "epoch": 0.42453275876532637, "grad_norm": 1.6974079608917236, "learning_rate": 6.444322965805616e-06, "loss": 0.8292, "step": 12019 }, { "epoch": 0.4245680805690343, "grad_norm": 1.6462854146957397, "learning_rate": 6.443775331279046e-06, "loss": 0.8089, "step": 12020 }, { "epoch": 0.4246034023727422, "grad_norm": 1.4899322986602783, "learning_rate": 6.443227677856617e-06, "loss": 0.7866, "step": 12021 }, { "epoch": 0.42463872417645004, "grad_norm": 1.591206669807434, "learning_rate": 6.442680005545494e-06, "loss": 0.8302, "step": 12022 }, { "epoch": 0.42467404598015795, "grad_norm": 1.707338571548462, "learning_rate": 6.442132314352848e-06, "loss": 0.7986, "step": 12023 }, { "epoch": 0.42470936778386587, "grad_norm": 1.7118678092956543, "learning_rate": 6.441584604285846e-06, "loss": 0.8195, "step": 12024 }, { "epoch": 0.4247446895875738, "grad_norm": 1.6867281198501587, "learning_rate": 6.441036875351652e-06, "loss": 0.8015, "step": 12025 }, { "epoch": 0.4247800113912817, "grad_norm": 1.5018421411514282, "learning_rate": 6.440489127557441e-06, "loss": 0.8069, "step": 12026 }, { "epoch": 0.4248153331949896, "grad_norm": 1.6167633533477783, "learning_rate": 6.43994136091038e-06, "loss": 0.7988, "step": 12027 }, { "epoch": 0.4248506549986975, "grad_norm": 1.684503197669983, "learning_rate": 6.439393575417636e-06, "loss": 0.7919, "step": 12028 }, { "epoch": 0.4248859768024054, "grad_norm": 1.6332008838653564, "learning_rate": 6.43884577108638e-06, "loss": 0.7978, "step": 12029 }, { "epoch": 0.42492129860611333, "grad_norm": 1.5841869115829468, "learning_rate": 6.438297947923779e-06, "loss": 0.7968, "step": 12030 }, { "epoch": 0.42495662040982124, "grad_norm": 1.8712959289550781, "learning_rate": 6.437750105937006e-06, "loss": 0.7933, "step": 12031 }, { "epoch": 0.42499194221352915, "grad_norm": 1.630894660949707, "learning_rate": 6.43720224513323e-06, "loss": 0.8346, "step": 12032 }, { "epoch": 0.42502726401723706, "grad_norm": 1.6667604446411133, "learning_rate": 6.436654365519621e-06, "loss": 0.7987, "step": 12033 }, { "epoch": 0.42506258582094497, "grad_norm": 1.6503793001174927, "learning_rate": 6.43610646710335e-06, "loss": 0.7946, "step": 12034 }, { "epoch": 0.4250979076246528, "grad_norm": 1.7846935987472534, "learning_rate": 6.435558549891589e-06, "loss": 0.8021, "step": 12035 }, { "epoch": 0.42513322942836074, "grad_norm": 1.610466480255127, "learning_rate": 6.435010613891504e-06, "loss": 0.809, "step": 12036 }, { "epoch": 0.42516855123206865, "grad_norm": 1.6306686401367188, "learning_rate": 6.4344626591102745e-06, "loss": 0.8063, "step": 12037 }, { "epoch": 0.42520387303577656, "grad_norm": 1.5565712451934814, "learning_rate": 6.4339146855550625e-06, "loss": 0.8292, "step": 12038 }, { "epoch": 0.42523919483948447, "grad_norm": 1.608147382736206, "learning_rate": 6.433366693233047e-06, "loss": 0.8299, "step": 12039 }, { "epoch": 0.4252745166431924, "grad_norm": 1.5082800388336182, "learning_rate": 6.432818682151398e-06, "loss": 0.7894, "step": 12040 }, { "epoch": 0.4253098384469003, "grad_norm": 1.5920683145523071, "learning_rate": 6.432270652317288e-06, "loss": 0.8104, "step": 12041 }, { "epoch": 0.4253451602506082, "grad_norm": 1.5751280784606934, "learning_rate": 6.4317226037378865e-06, "loss": 0.8139, "step": 12042 }, { "epoch": 0.4253804820543161, "grad_norm": 1.7773561477661133, "learning_rate": 6.43117453642037e-06, "loss": 0.7907, "step": 12043 }, { "epoch": 0.425415803858024, "grad_norm": 1.695622205734253, "learning_rate": 6.430626450371911e-06, "loss": 0.8146, "step": 12044 }, { "epoch": 0.42545112566173193, "grad_norm": 1.6234428882598877, "learning_rate": 6.430078345599681e-06, "loss": 0.7986, "step": 12045 }, { "epoch": 0.42548644746543984, "grad_norm": 1.642459511756897, "learning_rate": 6.429530222110854e-06, "loss": 0.797, "step": 12046 }, { "epoch": 0.42552176926914775, "grad_norm": 1.7341750860214233, "learning_rate": 6.4289820799126065e-06, "loss": 0.7953, "step": 12047 }, { "epoch": 0.4255570910728556, "grad_norm": 1.715261459350586, "learning_rate": 6.428433919012108e-06, "loss": 0.8021, "step": 12048 }, { "epoch": 0.4255924128765635, "grad_norm": 1.699327826499939, "learning_rate": 6.427885739416535e-06, "loss": 0.822, "step": 12049 }, { "epoch": 0.42562773468027143, "grad_norm": 1.5959689617156982, "learning_rate": 6.427337541133063e-06, "loss": 0.7991, "step": 12050 }, { "epoch": 0.42566305648397934, "grad_norm": 1.66533625125885, "learning_rate": 6.426789324168864e-06, "loss": 0.8274, "step": 12051 }, { "epoch": 0.42569837828768725, "grad_norm": 1.4388024806976318, "learning_rate": 6.4262410885311154e-06, "loss": 0.7581, "step": 12052 }, { "epoch": 0.42573370009139516, "grad_norm": 1.5362390279769897, "learning_rate": 6.425692834226992e-06, "loss": 0.8171, "step": 12053 }, { "epoch": 0.4257690218951031, "grad_norm": 1.76920485496521, "learning_rate": 6.425144561263668e-06, "loss": 0.8466, "step": 12054 }, { "epoch": 0.425804343698811, "grad_norm": 1.6076505184173584, "learning_rate": 6.42459626964832e-06, "loss": 0.791, "step": 12055 }, { "epoch": 0.4258396655025189, "grad_norm": 1.6024585962295532, "learning_rate": 6.424047959388125e-06, "loss": 0.7951, "step": 12056 }, { "epoch": 0.4258749873062268, "grad_norm": 1.7396173477172852, "learning_rate": 6.423499630490256e-06, "loss": 0.7733, "step": 12057 }, { "epoch": 0.4259103091099347, "grad_norm": 1.6348555088043213, "learning_rate": 6.422951282961893e-06, "loss": 0.8302, "step": 12058 }, { "epoch": 0.4259456309136426, "grad_norm": 1.4249718189239502, "learning_rate": 6.4224029168102096e-06, "loss": 0.7994, "step": 12059 }, { "epoch": 0.42598095271735054, "grad_norm": 1.453626275062561, "learning_rate": 6.421854532042386e-06, "loss": 0.7899, "step": 12060 }, { "epoch": 0.4260162745210584, "grad_norm": 1.6780673265457153, "learning_rate": 6.421306128665597e-06, "loss": 0.7776, "step": 12061 }, { "epoch": 0.4260515963247663, "grad_norm": 1.5694173574447632, "learning_rate": 6.42075770668702e-06, "loss": 0.786, "step": 12062 }, { "epoch": 0.4260869181284742, "grad_norm": 1.4898632764816284, "learning_rate": 6.420209266113833e-06, "loss": 0.8335, "step": 12063 }, { "epoch": 0.4261222399321821, "grad_norm": 1.642958402633667, "learning_rate": 6.419660806953214e-06, "loss": 0.8194, "step": 12064 }, { "epoch": 0.42615756173589003, "grad_norm": 1.5978418588638306, "learning_rate": 6.419112329212343e-06, "loss": 0.8326, "step": 12065 }, { "epoch": 0.42619288353959794, "grad_norm": 1.5764597654342651, "learning_rate": 6.418563832898396e-06, "loss": 0.784, "step": 12066 }, { "epoch": 0.42622820534330585, "grad_norm": 1.6597545146942139, "learning_rate": 6.41801531801855e-06, "loss": 0.7964, "step": 12067 }, { "epoch": 0.42626352714701377, "grad_norm": 1.5637253522872925, "learning_rate": 6.417466784579989e-06, "loss": 0.7952, "step": 12068 }, { "epoch": 0.4262988489507217, "grad_norm": 1.5646915435791016, "learning_rate": 6.416918232589887e-06, "loss": 0.8443, "step": 12069 }, { "epoch": 0.4263341707544296, "grad_norm": 1.5461496114730835, "learning_rate": 6.416369662055426e-06, "loss": 0.8124, "step": 12070 }, { "epoch": 0.4263694925581375, "grad_norm": 1.6572126150131226, "learning_rate": 6.415821072983785e-06, "loss": 0.8181, "step": 12071 }, { "epoch": 0.4264048143618454, "grad_norm": 1.5422320365905762, "learning_rate": 6.4152724653821455e-06, "loss": 0.7868, "step": 12072 }, { "epoch": 0.4264401361655533, "grad_norm": 1.600761890411377, "learning_rate": 6.414723839257684e-06, "loss": 0.7917, "step": 12073 }, { "epoch": 0.4264754579692612, "grad_norm": 1.7063734531402588, "learning_rate": 6.414175194617584e-06, "loss": 0.7836, "step": 12074 }, { "epoch": 0.4265107797729691, "grad_norm": 1.671099066734314, "learning_rate": 6.413626531469025e-06, "loss": 0.818, "step": 12075 }, { "epoch": 0.426546101576677, "grad_norm": 1.4951813220977783, "learning_rate": 6.413077849819187e-06, "loss": 0.7777, "step": 12076 }, { "epoch": 0.4265814233803849, "grad_norm": 1.7476999759674072, "learning_rate": 6.412529149675253e-06, "loss": 0.8009, "step": 12077 }, { "epoch": 0.4266167451840928, "grad_norm": 1.5890021324157715, "learning_rate": 6.411980431044401e-06, "loss": 0.7868, "step": 12078 }, { "epoch": 0.4266520669878007, "grad_norm": 1.527423620223999, "learning_rate": 6.411431693933816e-06, "loss": 0.794, "step": 12079 }, { "epoch": 0.42668738879150864, "grad_norm": 1.8318959474563599, "learning_rate": 6.410882938350678e-06, "loss": 0.8116, "step": 12080 }, { "epoch": 0.42672271059521655, "grad_norm": 1.5894908905029297, "learning_rate": 6.410334164302169e-06, "loss": 0.8237, "step": 12081 }, { "epoch": 0.42675803239892446, "grad_norm": 1.7194292545318604, "learning_rate": 6.409785371795472e-06, "loss": 0.8212, "step": 12082 }, { "epoch": 0.42679335420263237, "grad_norm": 1.6419790983200073, "learning_rate": 6.409236560837769e-06, "loss": 0.8401, "step": 12083 }, { "epoch": 0.4268286760063403, "grad_norm": 1.7808634042739868, "learning_rate": 6.408687731436243e-06, "loss": 0.8364, "step": 12084 }, { "epoch": 0.4268639978100482, "grad_norm": 1.6870646476745605, "learning_rate": 6.408138883598077e-06, "loss": 0.8228, "step": 12085 }, { "epoch": 0.4268993196137561, "grad_norm": 1.7124768495559692, "learning_rate": 6.407590017330453e-06, "loss": 0.8118, "step": 12086 }, { "epoch": 0.42693464141746396, "grad_norm": 1.5610313415527344, "learning_rate": 6.4070411326405555e-06, "loss": 0.7936, "step": 12087 }, { "epoch": 0.42696996322117187, "grad_norm": 1.596617579460144, "learning_rate": 6.406492229535568e-06, "loss": 0.8027, "step": 12088 }, { "epoch": 0.4270052850248798, "grad_norm": 1.6425036191940308, "learning_rate": 6.405943308022675e-06, "loss": 0.814, "step": 12089 }, { "epoch": 0.4270406068285877, "grad_norm": 1.7153164148330688, "learning_rate": 6.405394368109061e-06, "loss": 0.797, "step": 12090 }, { "epoch": 0.4270759286322956, "grad_norm": 1.9133893251419067, "learning_rate": 6.404845409801909e-06, "loss": 0.7982, "step": 12091 }, { "epoch": 0.4271112504360035, "grad_norm": 1.5368502140045166, "learning_rate": 6.404296433108405e-06, "loss": 0.8075, "step": 12092 }, { "epoch": 0.4271465722397114, "grad_norm": 1.8444849252700806, "learning_rate": 6.403747438035733e-06, "loss": 0.808, "step": 12093 }, { "epoch": 0.42718189404341933, "grad_norm": 1.7565337419509888, "learning_rate": 6.403198424591078e-06, "loss": 0.8324, "step": 12094 }, { "epoch": 0.42721721584712724, "grad_norm": 1.5212408304214478, "learning_rate": 6.402649392781626e-06, "loss": 0.8019, "step": 12095 }, { "epoch": 0.42725253765083515, "grad_norm": 1.7499021291732788, "learning_rate": 6.402100342614561e-06, "loss": 0.8334, "step": 12096 }, { "epoch": 0.42728785945454306, "grad_norm": 1.6493723392486572, "learning_rate": 6.401551274097073e-06, "loss": 0.815, "step": 12097 }, { "epoch": 0.427323181258251, "grad_norm": 1.712296962738037, "learning_rate": 6.401002187236344e-06, "loss": 0.8463, "step": 12098 }, { "epoch": 0.4273585030619589, "grad_norm": 1.8004474639892578, "learning_rate": 6.400453082039561e-06, "loss": 0.8144, "step": 12099 }, { "epoch": 0.42739382486566674, "grad_norm": 1.641279697418213, "learning_rate": 6.399903958513912e-06, "loss": 0.7846, "step": 12100 }, { "epoch": 0.42742914666937465, "grad_norm": 2.3679165840148926, "learning_rate": 6.399354816666584e-06, "loss": 0.8334, "step": 12101 }, { "epoch": 0.42746446847308256, "grad_norm": 1.816652774810791, "learning_rate": 6.398805656504763e-06, "loss": 0.7727, "step": 12102 }, { "epoch": 0.42749979027679047, "grad_norm": 1.6236193180084229, "learning_rate": 6.398256478035636e-06, "loss": 0.8049, "step": 12103 }, { "epoch": 0.4275351120804984, "grad_norm": 1.6863112449645996, "learning_rate": 6.3977072812663895e-06, "loss": 0.8275, "step": 12104 }, { "epoch": 0.4275704338842063, "grad_norm": 1.481162190437317, "learning_rate": 6.397158066204215e-06, "loss": 0.778, "step": 12105 }, { "epoch": 0.4276057556879142, "grad_norm": 1.547791838645935, "learning_rate": 6.396608832856298e-06, "loss": 0.8037, "step": 12106 }, { "epoch": 0.4276410774916221, "grad_norm": 1.5942116975784302, "learning_rate": 6.396059581229827e-06, "loss": 0.8223, "step": 12107 }, { "epoch": 0.42767639929533, "grad_norm": 1.8570798635482788, "learning_rate": 6.395510311331989e-06, "loss": 0.7893, "step": 12108 }, { "epoch": 0.42771172109903793, "grad_norm": 1.7339093685150146, "learning_rate": 6.394961023169977e-06, "loss": 0.8081, "step": 12109 }, { "epoch": 0.42774704290274584, "grad_norm": 1.6938022375106812, "learning_rate": 6.394411716750976e-06, "loss": 0.8515, "step": 12110 }, { "epoch": 0.42778236470645375, "grad_norm": 1.6250355243682861, "learning_rate": 6.393862392082177e-06, "loss": 0.8152, "step": 12111 }, { "epoch": 0.42781768651016167, "grad_norm": 1.6687153577804565, "learning_rate": 6.393313049170768e-06, "loss": 0.8278, "step": 12112 }, { "epoch": 0.4278530083138695, "grad_norm": 1.624611735343933, "learning_rate": 6.392763688023942e-06, "loss": 0.8108, "step": 12113 }, { "epoch": 0.42788833011757743, "grad_norm": 1.727014183998108, "learning_rate": 6.392214308648884e-06, "loss": 0.8059, "step": 12114 }, { "epoch": 0.42792365192128534, "grad_norm": 1.5550050735473633, "learning_rate": 6.391664911052788e-06, "loss": 0.7943, "step": 12115 }, { "epoch": 0.42795897372499325, "grad_norm": 1.830433964729309, "learning_rate": 6.391115495242843e-06, "loss": 0.8147, "step": 12116 }, { "epoch": 0.42799429552870116, "grad_norm": 1.5322651863098145, "learning_rate": 6.39056606122624e-06, "loss": 0.792, "step": 12117 }, { "epoch": 0.4280296173324091, "grad_norm": 1.0736583471298218, "learning_rate": 6.39001660901017e-06, "loss": 0.6087, "step": 12118 }, { "epoch": 0.428064939136117, "grad_norm": 1.8865126371383667, "learning_rate": 6.389467138601825e-06, "loss": 0.8706, "step": 12119 }, { "epoch": 0.4281002609398249, "grad_norm": 1.5735946893692017, "learning_rate": 6.388917650008395e-06, "loss": 0.8049, "step": 12120 }, { "epoch": 0.4281355827435328, "grad_norm": 1.637959599494934, "learning_rate": 6.38836814323707e-06, "loss": 0.7604, "step": 12121 }, { "epoch": 0.4281709045472407, "grad_norm": 1.7712472677230835, "learning_rate": 6.387818618295045e-06, "loss": 0.8263, "step": 12122 }, { "epoch": 0.4282062263509486, "grad_norm": 1.5917125940322876, "learning_rate": 6.3872690751895105e-06, "loss": 0.7959, "step": 12123 }, { "epoch": 0.42824154815465654, "grad_norm": 1.64151930809021, "learning_rate": 6.38671951392766e-06, "loss": 0.8328, "step": 12124 }, { "epoch": 0.42827686995836445, "grad_norm": 1.7805356979370117, "learning_rate": 6.386169934516683e-06, "loss": 0.7817, "step": 12125 }, { "epoch": 0.4283121917620723, "grad_norm": 1.8423892259597778, "learning_rate": 6.385620336963776e-06, "loss": 0.8331, "step": 12126 }, { "epoch": 0.4283475135657802, "grad_norm": 1.5776830911636353, "learning_rate": 6.385070721276131e-06, "loss": 0.7841, "step": 12127 }, { "epoch": 0.4283828353694881, "grad_norm": 1.7753797769546509, "learning_rate": 6.38452108746094e-06, "loss": 0.7885, "step": 12128 }, { "epoch": 0.42841815717319603, "grad_norm": 1.713821530342102, "learning_rate": 6.383971435525398e-06, "loss": 0.7937, "step": 12129 }, { "epoch": 0.42845347897690395, "grad_norm": 1.7213397026062012, "learning_rate": 6.383421765476696e-06, "loss": 0.799, "step": 12130 }, { "epoch": 0.42848880078061186, "grad_norm": 1.034551978111267, "learning_rate": 6.382872077322032e-06, "loss": 0.6272, "step": 12131 }, { "epoch": 0.42852412258431977, "grad_norm": 1.6744612455368042, "learning_rate": 6.382322371068598e-06, "loss": 0.8237, "step": 12132 }, { "epoch": 0.4285594443880277, "grad_norm": 1.561725378036499, "learning_rate": 6.381772646723588e-06, "loss": 0.7737, "step": 12133 }, { "epoch": 0.4285947661917356, "grad_norm": 1.581314206123352, "learning_rate": 6.3812229042941975e-06, "loss": 0.8259, "step": 12134 }, { "epoch": 0.4286300879954435, "grad_norm": 1.5541934967041016, "learning_rate": 6.3806731437876215e-06, "loss": 0.7927, "step": 12135 }, { "epoch": 0.4286654097991514, "grad_norm": 1.8252564668655396, "learning_rate": 6.3801233652110536e-06, "loss": 0.781, "step": 12136 }, { "epoch": 0.4287007316028593, "grad_norm": 1.6176176071166992, "learning_rate": 6.379573568571693e-06, "loss": 0.846, "step": 12137 }, { "epoch": 0.42873605340656723, "grad_norm": 1.424778938293457, "learning_rate": 6.379023753876729e-06, "loss": 0.8029, "step": 12138 }, { "epoch": 0.4287713752102751, "grad_norm": 1.737435221672058, "learning_rate": 6.378473921133367e-06, "loss": 0.7884, "step": 12139 }, { "epoch": 0.428806697013983, "grad_norm": 1.7059739828109741, "learning_rate": 6.3779240703487944e-06, "loss": 0.7968, "step": 12140 }, { "epoch": 0.4288420188176909, "grad_norm": 1.663382649421692, "learning_rate": 6.37737420153021e-06, "loss": 0.8097, "step": 12141 }, { "epoch": 0.4288773406213988, "grad_norm": 1.643101692199707, "learning_rate": 6.3768243146848104e-06, "loss": 0.7764, "step": 12142 }, { "epoch": 0.4289126624251067, "grad_norm": 1.6565908193588257, "learning_rate": 6.376274409819794e-06, "loss": 0.8208, "step": 12143 }, { "epoch": 0.42894798422881464, "grad_norm": 1.596512794494629, "learning_rate": 6.375724486942357e-06, "loss": 0.7977, "step": 12144 }, { "epoch": 0.42898330603252255, "grad_norm": 1.8352857828140259, "learning_rate": 6.375174546059697e-06, "loss": 0.8484, "step": 12145 }, { "epoch": 0.42901862783623046, "grad_norm": 9.038557052612305, "learning_rate": 6.3746245871790105e-06, "loss": 0.8152, "step": 12146 }, { "epoch": 0.42905394963993837, "grad_norm": 1.7141811847686768, "learning_rate": 6.3740746103074955e-06, "loss": 0.7795, "step": 12147 }, { "epoch": 0.4290892714436463, "grad_norm": 1.6757395267486572, "learning_rate": 6.373524615452351e-06, "loss": 0.8215, "step": 12148 }, { "epoch": 0.4291245932473542, "grad_norm": 1.674038052558899, "learning_rate": 6.372974602620774e-06, "loss": 0.7946, "step": 12149 }, { "epoch": 0.4291599150510621, "grad_norm": 1.6336768865585327, "learning_rate": 6.372424571819964e-06, "loss": 0.8276, "step": 12150 }, { "epoch": 0.42919523685477, "grad_norm": 1.7558753490447998, "learning_rate": 6.371874523057118e-06, "loss": 0.8241, "step": 12151 }, { "epoch": 0.42923055865847787, "grad_norm": 1.7104434967041016, "learning_rate": 6.371324456339438e-06, "loss": 0.7969, "step": 12152 }, { "epoch": 0.4292658804621858, "grad_norm": 1.4954345226287842, "learning_rate": 6.37077437167412e-06, "loss": 0.7969, "step": 12153 }, { "epoch": 0.4293012022658937, "grad_norm": 1.715151309967041, "learning_rate": 6.370224269068364e-06, "loss": 0.816, "step": 12154 }, { "epoch": 0.4293365240696016, "grad_norm": 1.4959973096847534, "learning_rate": 6.369674148529372e-06, "loss": 0.8072, "step": 12155 }, { "epoch": 0.4293718458733095, "grad_norm": 1.7643946409225464, "learning_rate": 6.369124010064342e-06, "loss": 0.8055, "step": 12156 }, { "epoch": 0.4294071676770174, "grad_norm": 1.565873622894287, "learning_rate": 6.368573853680474e-06, "loss": 0.8266, "step": 12157 }, { "epoch": 0.42944248948072533, "grad_norm": 1.8845546245574951, "learning_rate": 6.3680236793849696e-06, "loss": 0.7972, "step": 12158 }, { "epoch": 0.42947781128443324, "grad_norm": 1.683975100517273, "learning_rate": 6.3674734871850255e-06, "loss": 0.7932, "step": 12159 }, { "epoch": 0.42951313308814115, "grad_norm": 1.6560261249542236, "learning_rate": 6.366923277087848e-06, "loss": 0.8083, "step": 12160 }, { "epoch": 0.42954845489184906, "grad_norm": 1.7279199361801147, "learning_rate": 6.366373049100635e-06, "loss": 0.8086, "step": 12161 }, { "epoch": 0.429583776695557, "grad_norm": 1.7319390773773193, "learning_rate": 6.365822803230588e-06, "loss": 0.8268, "step": 12162 }, { "epoch": 0.4296190984992649, "grad_norm": 1.6156915426254272, "learning_rate": 6.365272539484909e-06, "loss": 0.7733, "step": 12163 }, { "epoch": 0.4296544203029728, "grad_norm": 1.6221667528152466, "learning_rate": 6.3647222578708e-06, "loss": 0.794, "step": 12164 }, { "epoch": 0.42968974210668065, "grad_norm": 1.8228967189788818, "learning_rate": 6.364171958395462e-06, "loss": 0.8243, "step": 12165 }, { "epoch": 0.42972506391038856, "grad_norm": 1.701203465461731, "learning_rate": 6.363621641066099e-06, "loss": 0.8123, "step": 12166 }, { "epoch": 0.42976038571409647, "grad_norm": 1.7677395343780518, "learning_rate": 6.363071305889911e-06, "loss": 0.7984, "step": 12167 }, { "epoch": 0.4297957075178044, "grad_norm": 1.6230210065841675, "learning_rate": 6.362520952874103e-06, "loss": 0.804, "step": 12168 }, { "epoch": 0.4298310293215123, "grad_norm": 1.679916262626648, "learning_rate": 6.361970582025877e-06, "loss": 0.8093, "step": 12169 }, { "epoch": 0.4298663511252202, "grad_norm": 1.5647907257080078, "learning_rate": 6.361420193352435e-06, "loss": 0.7978, "step": 12170 }, { "epoch": 0.4299016729289281, "grad_norm": 1.5710270404815674, "learning_rate": 6.360869786860982e-06, "loss": 0.8287, "step": 12171 }, { "epoch": 0.429936994732636, "grad_norm": 1.6222420930862427, "learning_rate": 6.36031936255872e-06, "loss": 0.8004, "step": 12172 }, { "epoch": 0.42997231653634393, "grad_norm": 1.7968792915344238, "learning_rate": 6.3597689204528556e-06, "loss": 0.7934, "step": 12173 }, { "epoch": 0.43000763834005185, "grad_norm": 1.6036063432693481, "learning_rate": 6.3592184605505906e-06, "loss": 0.7828, "step": 12174 }, { "epoch": 0.43004296014375976, "grad_norm": 1.736207127571106, "learning_rate": 6.358667982859128e-06, "loss": 0.8383, "step": 12175 }, { "epoch": 0.43007828194746767, "grad_norm": 1.6143324375152588, "learning_rate": 6.3581174873856776e-06, "loss": 0.8263, "step": 12176 }, { "epoch": 0.4301136037511756, "grad_norm": 1.7646057605743408, "learning_rate": 6.357566974137439e-06, "loss": 0.8251, "step": 12177 }, { "epoch": 0.4301489255548835, "grad_norm": 1.566630482673645, "learning_rate": 6.3570164431216194e-06, "loss": 0.7523, "step": 12178 }, { "epoch": 0.43018424735859134, "grad_norm": 1.8774809837341309, "learning_rate": 6.356465894345424e-06, "loss": 0.8181, "step": 12179 }, { "epoch": 0.43021956916229925, "grad_norm": 1.4879413843154907, "learning_rate": 6.355915327816056e-06, "loss": 0.7896, "step": 12180 }, { "epoch": 0.43025489096600716, "grad_norm": 1.6186871528625488, "learning_rate": 6.355364743540725e-06, "loss": 0.8102, "step": 12181 }, { "epoch": 0.4302902127697151, "grad_norm": 1.6102142333984375, "learning_rate": 6.354814141526635e-06, "loss": 0.796, "step": 12182 }, { "epoch": 0.430325534573423, "grad_norm": 1.875011682510376, "learning_rate": 6.354263521780991e-06, "loss": 0.8335, "step": 12183 }, { "epoch": 0.4303608563771309, "grad_norm": 1.734802484512329, "learning_rate": 6.353712884311002e-06, "loss": 0.8288, "step": 12184 }, { "epoch": 0.4303961781808388, "grad_norm": 1.6341943740844727, "learning_rate": 6.353162229123872e-06, "loss": 0.8202, "step": 12185 }, { "epoch": 0.4304314999845467, "grad_norm": 1.8275775909423828, "learning_rate": 6.352611556226811e-06, "loss": 0.8316, "step": 12186 }, { "epoch": 0.4304668217882546, "grad_norm": 1.7074635028839111, "learning_rate": 6.352060865627023e-06, "loss": 0.7917, "step": 12187 }, { "epoch": 0.43050214359196254, "grad_norm": 1.7344943284988403, "learning_rate": 6.3515101573317154e-06, "loss": 0.7867, "step": 12188 }, { "epoch": 0.43053746539567045, "grad_norm": 1.573507308959961, "learning_rate": 6.350959431348098e-06, "loss": 0.7973, "step": 12189 }, { "epoch": 0.43057278719937836, "grad_norm": 1.5448222160339355, "learning_rate": 6.350408687683378e-06, "loss": 0.7968, "step": 12190 }, { "epoch": 0.43060810900308627, "grad_norm": 1.8186380863189697, "learning_rate": 6.349857926344763e-06, "loss": 0.8075, "step": 12191 }, { "epoch": 0.4306434308067941, "grad_norm": 1.689939260482788, "learning_rate": 6.3493071473394604e-06, "loss": 0.7966, "step": 12192 }, { "epoch": 0.43067875261050204, "grad_norm": 1.923162579536438, "learning_rate": 6.34875635067468e-06, "loss": 0.8463, "step": 12193 }, { "epoch": 0.43071407441420995, "grad_norm": 1.656652808189392, "learning_rate": 6.34820553635763e-06, "loss": 0.7761, "step": 12194 }, { "epoch": 0.43074939621791786, "grad_norm": 1.4626976251602173, "learning_rate": 6.34765470439552e-06, "loss": 0.7962, "step": 12195 }, { "epoch": 0.43078471802162577, "grad_norm": 2.273240089416504, "learning_rate": 6.347103854795557e-06, "loss": 0.7972, "step": 12196 }, { "epoch": 0.4308200398253337, "grad_norm": 1.4902169704437256, "learning_rate": 6.3465529875649525e-06, "loss": 0.7812, "step": 12197 }, { "epoch": 0.4308553616290416, "grad_norm": 1.8817576169967651, "learning_rate": 6.346002102710916e-06, "loss": 0.8122, "step": 12198 }, { "epoch": 0.4308906834327495, "grad_norm": 1.5719820261001587, "learning_rate": 6.345451200240655e-06, "loss": 0.831, "step": 12199 }, { "epoch": 0.4309260052364574, "grad_norm": 1.8249123096466064, "learning_rate": 6.3449002801613845e-06, "loss": 0.805, "step": 12200 }, { "epoch": 0.4309613270401653, "grad_norm": 1.5686575174331665, "learning_rate": 6.34434934248031e-06, "loss": 0.7896, "step": 12201 }, { "epoch": 0.43099664884387323, "grad_norm": 1.696575403213501, "learning_rate": 6.343798387204645e-06, "loss": 0.812, "step": 12202 }, { "epoch": 0.43103197064758114, "grad_norm": 1.626295566558838, "learning_rate": 6.3432474143416015e-06, "loss": 0.8314, "step": 12203 }, { "epoch": 0.43106729245128905, "grad_norm": 1.63587486743927, "learning_rate": 6.342696423898385e-06, "loss": 0.8181, "step": 12204 }, { "epoch": 0.4311026142549969, "grad_norm": 1.5433461666107178, "learning_rate": 6.342145415882212e-06, "loss": 0.7905, "step": 12205 }, { "epoch": 0.4311379360587048, "grad_norm": 1.6164402961730957, "learning_rate": 6.34159439030029e-06, "loss": 0.8193, "step": 12206 }, { "epoch": 0.43117325786241273, "grad_norm": 1.8127776384353638, "learning_rate": 6.341043347159834e-06, "loss": 0.78, "step": 12207 }, { "epoch": 0.43120857966612064, "grad_norm": 1.6186076402664185, "learning_rate": 6.340492286468055e-06, "loss": 0.7921, "step": 12208 }, { "epoch": 0.43124390146982855, "grad_norm": 1.549261212348938, "learning_rate": 6.339941208232165e-06, "loss": 0.8016, "step": 12209 }, { "epoch": 0.43127922327353646, "grad_norm": 1.589964747428894, "learning_rate": 6.339390112459376e-06, "loss": 0.8329, "step": 12210 }, { "epoch": 0.43131454507724437, "grad_norm": 1.7042346000671387, "learning_rate": 6.338838999156901e-06, "loss": 0.7873, "step": 12211 }, { "epoch": 0.4313498668809523, "grad_norm": 1.6778481006622314, "learning_rate": 6.338287868331953e-06, "loss": 0.8248, "step": 12212 }, { "epoch": 0.4313851886846602, "grad_norm": 1.7927297353744507, "learning_rate": 6.337736719991745e-06, "loss": 0.8121, "step": 12213 }, { "epoch": 0.4314205104883681, "grad_norm": 1.8351398706436157, "learning_rate": 6.33718555414349e-06, "loss": 0.8096, "step": 12214 }, { "epoch": 0.431455832292076, "grad_norm": 1.9241632223129272, "learning_rate": 6.336634370794402e-06, "loss": 0.8353, "step": 12215 }, { "epoch": 0.4314911540957839, "grad_norm": 1.561133623123169, "learning_rate": 6.336083169951695e-06, "loss": 0.8299, "step": 12216 }, { "epoch": 0.43152647589949183, "grad_norm": 1.618505597114563, "learning_rate": 6.335531951622582e-06, "loss": 0.8165, "step": 12217 }, { "epoch": 0.4315617977031997, "grad_norm": 1.7870652675628662, "learning_rate": 6.334980715814278e-06, "loss": 0.7919, "step": 12218 }, { "epoch": 0.4315971195069076, "grad_norm": 1.8609468936920166, "learning_rate": 6.334429462533998e-06, "loss": 0.8142, "step": 12219 }, { "epoch": 0.4316324413106155, "grad_norm": 1.5170854330062866, "learning_rate": 6.333878191788955e-06, "loss": 0.8083, "step": 12220 }, { "epoch": 0.4316677631143234, "grad_norm": 1.7000596523284912, "learning_rate": 6.333326903586365e-06, "loss": 0.761, "step": 12221 }, { "epoch": 0.43170308491803133, "grad_norm": 1.646572232246399, "learning_rate": 6.332775597933445e-06, "loss": 0.7744, "step": 12222 }, { "epoch": 0.43173840672173924, "grad_norm": 1.8043699264526367, "learning_rate": 6.332224274837406e-06, "loss": 0.7843, "step": 12223 }, { "epoch": 0.43177372852544715, "grad_norm": 1.6926766633987427, "learning_rate": 6.331672934305468e-06, "loss": 0.7977, "step": 12224 }, { "epoch": 0.43180905032915506, "grad_norm": 1.6039109230041504, "learning_rate": 6.331121576344843e-06, "loss": 0.797, "step": 12225 }, { "epoch": 0.431844372132863, "grad_norm": 2.0218265056610107, "learning_rate": 6.3305702009627515e-06, "loss": 0.7731, "step": 12226 }, { "epoch": 0.4318796939365709, "grad_norm": 1.599782943725586, "learning_rate": 6.330018808166406e-06, "loss": 0.8408, "step": 12227 }, { "epoch": 0.4319150157402788, "grad_norm": 1.8027654886245728, "learning_rate": 6.329467397963024e-06, "loss": 0.7946, "step": 12228 }, { "epoch": 0.4319503375439867, "grad_norm": 2.0073060989379883, "learning_rate": 6.328915970359824e-06, "loss": 0.8175, "step": 12229 }, { "epoch": 0.4319856593476946, "grad_norm": 1.7099500894546509, "learning_rate": 6.328364525364019e-06, "loss": 0.8126, "step": 12230 }, { "epoch": 0.4320209811514025, "grad_norm": 1.8822659254074097, "learning_rate": 6.327813062982832e-06, "loss": 0.793, "step": 12231 }, { "epoch": 0.4320563029551104, "grad_norm": 1.8777687549591064, "learning_rate": 6.327261583223475e-06, "loss": 0.7857, "step": 12232 }, { "epoch": 0.4320916247588183, "grad_norm": 1.6383534669876099, "learning_rate": 6.326710086093169e-06, "loss": 0.7787, "step": 12233 }, { "epoch": 0.4321269465625262, "grad_norm": 1.848071813583374, "learning_rate": 6.32615857159913e-06, "loss": 0.827, "step": 12234 }, { "epoch": 0.4321622683662341, "grad_norm": 1.5163060426712036, "learning_rate": 6.325607039748576e-06, "loss": 0.8337, "step": 12235 }, { "epoch": 0.432197590169942, "grad_norm": 1.5885876417160034, "learning_rate": 6.325055490548728e-06, "loss": 0.8038, "step": 12236 }, { "epoch": 0.43223291197364994, "grad_norm": 1.7360526323318481, "learning_rate": 6.324503924006803e-06, "loss": 0.8049, "step": 12237 }, { "epoch": 0.43226823377735785, "grad_norm": 1.7218676805496216, "learning_rate": 6.323952340130017e-06, "loss": 0.8257, "step": 12238 }, { "epoch": 0.43230355558106576, "grad_norm": 1.56560480594635, "learning_rate": 6.323400738925595e-06, "loss": 0.7817, "step": 12239 }, { "epoch": 0.43233887738477367, "grad_norm": 1.737546443939209, "learning_rate": 6.322849120400752e-06, "loss": 0.7973, "step": 12240 }, { "epoch": 0.4323741991884816, "grad_norm": 1.6560187339782715, "learning_rate": 6.322297484562707e-06, "loss": 0.7969, "step": 12241 }, { "epoch": 0.4324095209921895, "grad_norm": 1.6390899419784546, "learning_rate": 6.321745831418683e-06, "loss": 0.7856, "step": 12242 }, { "epoch": 0.4324448427958974, "grad_norm": 1.783954381942749, "learning_rate": 6.321194160975897e-06, "loss": 0.7948, "step": 12243 }, { "epoch": 0.43248016459960525, "grad_norm": 1.6687147617340088, "learning_rate": 6.320642473241569e-06, "loss": 0.8315, "step": 12244 }, { "epoch": 0.43251548640331317, "grad_norm": 1.4593497514724731, "learning_rate": 6.3200907682229215e-06, "loss": 0.8212, "step": 12245 }, { "epoch": 0.4325508082070211, "grad_norm": 1.614925503730774, "learning_rate": 6.3195390459271744e-06, "loss": 0.8313, "step": 12246 }, { "epoch": 0.432586130010729, "grad_norm": 1.6702001094818115, "learning_rate": 6.318987306361548e-06, "loss": 0.7738, "step": 12247 }, { "epoch": 0.4326214518144369, "grad_norm": 1.6200816631317139, "learning_rate": 6.318435549533265e-06, "loss": 0.8332, "step": 12248 }, { "epoch": 0.4326567736181448, "grad_norm": 1.4270555973052979, "learning_rate": 6.317883775449545e-06, "loss": 0.7867, "step": 12249 }, { "epoch": 0.4326920954218527, "grad_norm": 1.751388430595398, "learning_rate": 6.317331984117609e-06, "loss": 0.8356, "step": 12250 }, { "epoch": 0.43272741722556063, "grad_norm": 1.6019712686538696, "learning_rate": 6.316780175544679e-06, "loss": 0.8034, "step": 12251 }, { "epoch": 0.43276273902926854, "grad_norm": 1.794857144355774, "learning_rate": 6.316228349737978e-06, "loss": 0.8324, "step": 12252 }, { "epoch": 0.43279806083297645, "grad_norm": 1.6577178239822388, "learning_rate": 6.3156765067047285e-06, "loss": 0.8253, "step": 12253 }, { "epoch": 0.43283338263668436, "grad_norm": 1.5504589080810547, "learning_rate": 6.315124646452152e-06, "loss": 0.8077, "step": 12254 }, { "epoch": 0.43286870444039227, "grad_norm": 1.5839052200317383, "learning_rate": 6.314572768987472e-06, "loss": 0.8126, "step": 12255 }, { "epoch": 0.4329040262441002, "grad_norm": 1.5471080541610718, "learning_rate": 6.31402087431791e-06, "loss": 0.7878, "step": 12256 }, { "epoch": 0.43293934804780804, "grad_norm": 1.5783709287643433, "learning_rate": 6.313468962450688e-06, "loss": 0.77, "step": 12257 }, { "epoch": 0.43297466985151595, "grad_norm": 1.4941353797912598, "learning_rate": 6.312917033393033e-06, "loss": 0.7815, "step": 12258 }, { "epoch": 0.43300999165522386, "grad_norm": 1.6704195737838745, "learning_rate": 6.312365087152168e-06, "loss": 0.8115, "step": 12259 }, { "epoch": 0.43304531345893177, "grad_norm": 1.600595474243164, "learning_rate": 6.311813123735314e-06, "loss": 0.7744, "step": 12260 }, { "epoch": 0.4330806352626397, "grad_norm": 1.8035883903503418, "learning_rate": 6.311261143149696e-06, "loss": 0.8275, "step": 12261 }, { "epoch": 0.4331159570663476, "grad_norm": 1.755155324935913, "learning_rate": 6.31070914540254e-06, "loss": 0.8467, "step": 12262 }, { "epoch": 0.4331512788700555, "grad_norm": 1.7569751739501953, "learning_rate": 6.310157130501068e-06, "loss": 0.8167, "step": 12263 }, { "epoch": 0.4331866006737634, "grad_norm": 1.5871639251708984, "learning_rate": 6.309605098452505e-06, "loss": 0.8176, "step": 12264 }, { "epoch": 0.4332219224774713, "grad_norm": 1.6310994625091553, "learning_rate": 6.3090530492640775e-06, "loss": 0.8046, "step": 12265 }, { "epoch": 0.43325724428117923, "grad_norm": 1.7898938655853271, "learning_rate": 6.30850098294301e-06, "loss": 0.7979, "step": 12266 }, { "epoch": 0.43329256608488714, "grad_norm": 1.5738457441329956, "learning_rate": 6.307948899496526e-06, "loss": 0.8041, "step": 12267 }, { "epoch": 0.43332788788859505, "grad_norm": 1.666811227798462, "learning_rate": 6.307396798931856e-06, "loss": 0.7705, "step": 12268 }, { "epoch": 0.43336320969230296, "grad_norm": 1.6003944873809814, "learning_rate": 6.30684468125622e-06, "loss": 0.7571, "step": 12269 }, { "epoch": 0.4333985314960108, "grad_norm": 1.6618828773498535, "learning_rate": 6.306292546476847e-06, "loss": 0.8241, "step": 12270 }, { "epoch": 0.43343385329971873, "grad_norm": 1.9009209871292114, "learning_rate": 6.305740394600963e-06, "loss": 0.8281, "step": 12271 }, { "epoch": 0.43346917510342664, "grad_norm": 1.636621117591858, "learning_rate": 6.305188225635794e-06, "loss": 0.7926, "step": 12272 }, { "epoch": 0.43350449690713455, "grad_norm": 1.5024998188018799, "learning_rate": 6.3046360395885665e-06, "loss": 0.7824, "step": 12273 }, { "epoch": 0.43353981871084246, "grad_norm": 0.9943972826004028, "learning_rate": 6.304083836466508e-06, "loss": 0.609, "step": 12274 }, { "epoch": 0.4335751405145504, "grad_norm": 1.7540382146835327, "learning_rate": 6.303531616276845e-06, "loss": 0.8115, "step": 12275 }, { "epoch": 0.4336104623182583, "grad_norm": 1.5853550434112549, "learning_rate": 6.3029793790268055e-06, "loss": 0.7727, "step": 12276 }, { "epoch": 0.4336457841219662, "grad_norm": 1.7922905683517456, "learning_rate": 6.302427124723618e-06, "loss": 0.8098, "step": 12277 }, { "epoch": 0.4336811059256741, "grad_norm": 1.5574744939804077, "learning_rate": 6.301874853374508e-06, "loss": 0.7741, "step": 12278 }, { "epoch": 0.433716427729382, "grad_norm": 1.7778536081314087, "learning_rate": 6.301322564986705e-06, "loss": 0.8003, "step": 12279 }, { "epoch": 0.4337517495330899, "grad_norm": 1.8002926111221313, "learning_rate": 6.300770259567437e-06, "loss": 0.801, "step": 12280 }, { "epoch": 0.43378707133679784, "grad_norm": 1.5851994752883911, "learning_rate": 6.3002179371239315e-06, "loss": 0.7857, "step": 12281 }, { "epoch": 0.43382239314050575, "grad_norm": 1.7499268054962158, "learning_rate": 6.299665597663419e-06, "loss": 0.8238, "step": 12282 }, { "epoch": 0.4338577149442136, "grad_norm": 1.6980823278427124, "learning_rate": 6.2991132411931265e-06, "loss": 0.8139, "step": 12283 }, { "epoch": 0.4338930367479215, "grad_norm": 1.9747294187545776, "learning_rate": 6.298560867720284e-06, "loss": 0.7996, "step": 12284 }, { "epoch": 0.4339283585516294, "grad_norm": 1.7524852752685547, "learning_rate": 6.2980084772521226e-06, "loss": 0.8095, "step": 12285 }, { "epoch": 0.43396368035533733, "grad_norm": 2.6454017162323, "learning_rate": 6.29745606979587e-06, "loss": 0.8226, "step": 12286 }, { "epoch": 0.43399900215904524, "grad_norm": 1.7486294507980347, "learning_rate": 6.296903645358755e-06, "loss": 0.8406, "step": 12287 }, { "epoch": 0.43403432396275315, "grad_norm": 0.9574692249298096, "learning_rate": 6.296351203948009e-06, "loss": 0.581, "step": 12288 }, { "epoch": 0.43406964576646107, "grad_norm": 1.6601814031600952, "learning_rate": 6.295798745570863e-06, "loss": 0.7892, "step": 12289 }, { "epoch": 0.434104967570169, "grad_norm": 1.6180918216705322, "learning_rate": 6.295246270234547e-06, "loss": 0.8228, "step": 12290 }, { "epoch": 0.4341402893738769, "grad_norm": 1.835997462272644, "learning_rate": 6.294693777946289e-06, "loss": 0.8179, "step": 12291 }, { "epoch": 0.4341756111775848, "grad_norm": 2.2835729122161865, "learning_rate": 6.294141268713324e-06, "loss": 0.7983, "step": 12292 }, { "epoch": 0.4342109329812927, "grad_norm": 1.5660065412521362, "learning_rate": 6.29358874254288e-06, "loss": 0.8056, "step": 12293 }, { "epoch": 0.4342462547850006, "grad_norm": 1.6761653423309326, "learning_rate": 6.2930361994421904e-06, "loss": 0.8197, "step": 12294 }, { "epoch": 0.43428157658870853, "grad_norm": 1.5767371654510498, "learning_rate": 6.292483639418488e-06, "loss": 0.7821, "step": 12295 }, { "epoch": 0.4343168983924164, "grad_norm": 1.6798352003097534, "learning_rate": 6.291931062479001e-06, "loss": 0.8349, "step": 12296 }, { "epoch": 0.4343522201961243, "grad_norm": 1.6753623485565186, "learning_rate": 6.291378468630963e-06, "loss": 0.7984, "step": 12297 }, { "epoch": 0.4343875419998322, "grad_norm": 1.7597966194152832, "learning_rate": 6.290825857881606e-06, "loss": 0.768, "step": 12298 }, { "epoch": 0.4344228638035401, "grad_norm": 1.7337945699691772, "learning_rate": 6.290273230238163e-06, "loss": 0.8288, "step": 12299 }, { "epoch": 0.434458185607248, "grad_norm": 1.635316014289856, "learning_rate": 6.289720585707868e-06, "loss": 0.8059, "step": 12300 }, { "epoch": 0.43449350741095594, "grad_norm": 1.6706074476242065, "learning_rate": 6.28916792429795e-06, "loss": 0.8367, "step": 12301 }, { "epoch": 0.43452882921466385, "grad_norm": 1.7251675128936768, "learning_rate": 6.288615246015645e-06, "loss": 0.809, "step": 12302 }, { "epoch": 0.43456415101837176, "grad_norm": 1.7386597394943237, "learning_rate": 6.288062550868188e-06, "loss": 0.7871, "step": 12303 }, { "epoch": 0.43459947282207967, "grad_norm": 1.69247305393219, "learning_rate": 6.287509838862809e-06, "loss": 0.8296, "step": 12304 }, { "epoch": 0.4346347946257876, "grad_norm": 1.8090081214904785, "learning_rate": 6.286957110006744e-06, "loss": 0.803, "step": 12305 }, { "epoch": 0.4346701164294955, "grad_norm": 1.899969458580017, "learning_rate": 6.286404364307225e-06, "loss": 0.8135, "step": 12306 }, { "epoch": 0.4347054382332034, "grad_norm": 1.9897114038467407, "learning_rate": 6.285851601771489e-06, "loss": 0.8049, "step": 12307 }, { "epoch": 0.4347407600369113, "grad_norm": 1.5127869844436646, "learning_rate": 6.285298822406768e-06, "loss": 0.7858, "step": 12308 }, { "epoch": 0.43477608184061917, "grad_norm": 1.8823481798171997, "learning_rate": 6.284746026220296e-06, "loss": 0.8172, "step": 12309 }, { "epoch": 0.4348114036443271, "grad_norm": 1.784123420715332, "learning_rate": 6.284193213219313e-06, "loss": 0.8134, "step": 12310 }, { "epoch": 0.434846725448035, "grad_norm": 1.8864073753356934, "learning_rate": 6.283640383411049e-06, "loss": 0.7835, "step": 12311 }, { "epoch": 0.4348820472517429, "grad_norm": 1.9565305709838867, "learning_rate": 6.28308753680274e-06, "loss": 0.8791, "step": 12312 }, { "epoch": 0.4349173690554508, "grad_norm": 2.09375, "learning_rate": 6.2825346734016235e-06, "loss": 0.8262, "step": 12313 }, { "epoch": 0.4349526908591587, "grad_norm": 1.5654982328414917, "learning_rate": 6.281981793214934e-06, "loss": 0.8012, "step": 12314 }, { "epoch": 0.43498801266286663, "grad_norm": 2.0616214275360107, "learning_rate": 6.281428896249908e-06, "loss": 0.7857, "step": 12315 }, { "epoch": 0.43502333446657454, "grad_norm": 2.186431407928467, "learning_rate": 6.280875982513782e-06, "loss": 0.8171, "step": 12316 }, { "epoch": 0.43505865627028245, "grad_norm": 1.7455246448516846, "learning_rate": 6.2803230520137906e-06, "loss": 0.781, "step": 12317 }, { "epoch": 0.43509397807399036, "grad_norm": 1.5351901054382324, "learning_rate": 6.2797701047571725e-06, "loss": 0.7929, "step": 12318 }, { "epoch": 0.4351292998776983, "grad_norm": 1.8197782039642334, "learning_rate": 6.279217140751165e-06, "loss": 0.8334, "step": 12319 }, { "epoch": 0.4351646216814062, "grad_norm": 1.7097989320755005, "learning_rate": 6.278664160003002e-06, "loss": 0.7732, "step": 12320 }, { "epoch": 0.4351999434851141, "grad_norm": 1.0318090915679932, "learning_rate": 6.278111162519925e-06, "loss": 0.6066, "step": 12321 }, { "epoch": 0.43523526528882195, "grad_norm": 1.8588180541992188, "learning_rate": 6.277558148309166e-06, "loss": 0.8242, "step": 12322 }, { "epoch": 0.43527058709252986, "grad_norm": 1.6601907014846802, "learning_rate": 6.27700511737797e-06, "loss": 0.7811, "step": 12323 }, { "epoch": 0.43530590889623777, "grad_norm": 1.572184681892395, "learning_rate": 6.27645206973357e-06, "loss": 0.7915, "step": 12324 }, { "epoch": 0.4353412306999457, "grad_norm": 1.6760294437408447, "learning_rate": 6.275899005383203e-06, "loss": 0.8179, "step": 12325 }, { "epoch": 0.4353765525036536, "grad_norm": 2.0349948406219482, "learning_rate": 6.2753459243341105e-06, "loss": 0.8163, "step": 12326 }, { "epoch": 0.4354118743073615, "grad_norm": 1.8600598573684692, "learning_rate": 6.274792826593531e-06, "loss": 0.8026, "step": 12327 }, { "epoch": 0.4354471961110694, "grad_norm": 1.7410119771957397, "learning_rate": 6.274239712168703e-06, "loss": 0.8269, "step": 12328 }, { "epoch": 0.4354825179147773, "grad_norm": 1.6136400699615479, "learning_rate": 6.273686581066865e-06, "loss": 0.803, "step": 12329 }, { "epoch": 0.43551783971848523, "grad_norm": 1.673307180404663, "learning_rate": 6.273133433295255e-06, "loss": 0.8033, "step": 12330 }, { "epoch": 0.43555316152219314, "grad_norm": 1.7195504903793335, "learning_rate": 6.272580268861116e-06, "loss": 0.8231, "step": 12331 }, { "epoch": 0.43558848332590105, "grad_norm": 1.8384642601013184, "learning_rate": 6.272027087771685e-06, "loss": 0.8074, "step": 12332 }, { "epoch": 0.43562380512960897, "grad_norm": 1.739972710609436, "learning_rate": 6.271473890034203e-06, "loss": 0.8128, "step": 12333 }, { "epoch": 0.4356591269333169, "grad_norm": 1.7817575931549072, "learning_rate": 6.270920675655909e-06, "loss": 0.8282, "step": 12334 }, { "epoch": 0.43569444873702473, "grad_norm": 1.7382785081863403, "learning_rate": 6.270367444644043e-06, "loss": 0.8275, "step": 12335 }, { "epoch": 0.43572977054073264, "grad_norm": 1.824337363243103, "learning_rate": 6.269814197005849e-06, "loss": 0.7932, "step": 12336 }, { "epoch": 0.43576509234444055, "grad_norm": 1.5586916208267212, "learning_rate": 6.269260932748564e-06, "loss": 0.8061, "step": 12337 }, { "epoch": 0.43580041414814846, "grad_norm": 1.689704179763794, "learning_rate": 6.268707651879431e-06, "loss": 0.796, "step": 12338 }, { "epoch": 0.4358357359518564, "grad_norm": 1.5812383890151978, "learning_rate": 6.268154354405691e-06, "loss": 0.796, "step": 12339 }, { "epoch": 0.4358710577555643, "grad_norm": 1.4804812669754028, "learning_rate": 6.267601040334584e-06, "loss": 0.7813, "step": 12340 }, { "epoch": 0.4359063795592722, "grad_norm": 3.8800292015075684, "learning_rate": 6.2670477096733535e-06, "loss": 0.7913, "step": 12341 }, { "epoch": 0.4359417013629801, "grad_norm": 1.6792336702346802, "learning_rate": 6.266494362429243e-06, "loss": 0.7676, "step": 12342 }, { "epoch": 0.435977023166688, "grad_norm": 1.7667418718338013, "learning_rate": 6.265940998609489e-06, "loss": 0.8327, "step": 12343 }, { "epoch": 0.4360123449703959, "grad_norm": 1.7011414766311646, "learning_rate": 6.265387618221339e-06, "loss": 0.8118, "step": 12344 }, { "epoch": 0.43604766677410384, "grad_norm": 1.619964838027954, "learning_rate": 6.264834221272034e-06, "loss": 0.8107, "step": 12345 }, { "epoch": 0.43608298857781175, "grad_norm": 1.7426027059555054, "learning_rate": 6.264280807768814e-06, "loss": 0.8064, "step": 12346 }, { "epoch": 0.43611831038151966, "grad_norm": 1.5338319540023804, "learning_rate": 6.263727377718926e-06, "loss": 0.7653, "step": 12347 }, { "epoch": 0.4361536321852275, "grad_norm": 1.609876036643982, "learning_rate": 6.263173931129614e-06, "loss": 0.8785, "step": 12348 }, { "epoch": 0.4361889539889354, "grad_norm": 1.8182649612426758, "learning_rate": 6.262620468008115e-06, "loss": 0.7972, "step": 12349 }, { "epoch": 0.43622427579264333, "grad_norm": 1.7721384763717651, "learning_rate": 6.2620669883616795e-06, "loss": 0.8635, "step": 12350 }, { "epoch": 0.43625959759635125, "grad_norm": 1.7697546482086182, "learning_rate": 6.261513492197547e-06, "loss": 0.8014, "step": 12351 }, { "epoch": 0.43629491940005916, "grad_norm": 1.7826627492904663, "learning_rate": 6.2609599795229645e-06, "loss": 0.8248, "step": 12352 }, { "epoch": 0.43633024120376707, "grad_norm": 1.5880898237228394, "learning_rate": 6.260406450345173e-06, "loss": 0.8003, "step": 12353 }, { "epoch": 0.436365563007475, "grad_norm": 1.6075689792633057, "learning_rate": 6.2598529046714195e-06, "loss": 0.7997, "step": 12354 }, { "epoch": 0.4364008848111829, "grad_norm": 1.7936469316482544, "learning_rate": 6.259299342508949e-06, "loss": 0.8255, "step": 12355 }, { "epoch": 0.4364362066148908, "grad_norm": 1.611777901649475, "learning_rate": 6.258745763865004e-06, "loss": 0.7879, "step": 12356 }, { "epoch": 0.4364715284185987, "grad_norm": 0.9326983094215393, "learning_rate": 6.2581921687468315e-06, "loss": 0.5872, "step": 12357 }, { "epoch": 0.4365068502223066, "grad_norm": 1.857176423072815, "learning_rate": 6.2576385571616774e-06, "loss": 0.7974, "step": 12358 }, { "epoch": 0.43654217202601453, "grad_norm": 1.467105746269226, "learning_rate": 6.257084929116784e-06, "loss": 0.7861, "step": 12359 }, { "epoch": 0.43657749382972244, "grad_norm": 1.5955848693847656, "learning_rate": 6.256531284619403e-06, "loss": 0.7923, "step": 12360 }, { "epoch": 0.4366128156334303, "grad_norm": 1.6038508415222168, "learning_rate": 6.255977623676773e-06, "loss": 0.7985, "step": 12361 }, { "epoch": 0.4366481374371382, "grad_norm": 1.8143084049224854, "learning_rate": 6.255423946296146e-06, "loss": 0.8336, "step": 12362 }, { "epoch": 0.4366834592408461, "grad_norm": 1.7251569032669067, "learning_rate": 6.254870252484766e-06, "loss": 0.7851, "step": 12363 }, { "epoch": 0.436718781044554, "grad_norm": 2.3738157749176025, "learning_rate": 6.2543165422498794e-06, "loss": 0.7645, "step": 12364 }, { "epoch": 0.43675410284826194, "grad_norm": 1.7305604219436646, "learning_rate": 6.253762815598734e-06, "loss": 0.8134, "step": 12365 }, { "epoch": 0.43678942465196985, "grad_norm": 1.4778361320495605, "learning_rate": 6.253209072538578e-06, "loss": 0.7975, "step": 12366 }, { "epoch": 0.43682474645567776, "grad_norm": 1.997617244720459, "learning_rate": 6.252655313076654e-06, "loss": 0.7721, "step": 12367 }, { "epoch": 0.43686006825938567, "grad_norm": 1.7154109477996826, "learning_rate": 6.2521015372202165e-06, "loss": 0.8285, "step": 12368 }, { "epoch": 0.4368953900630936, "grad_norm": 1.5827336311340332, "learning_rate": 6.251547744976507e-06, "loss": 0.7671, "step": 12369 }, { "epoch": 0.4369307118668015, "grad_norm": 1.8379995822906494, "learning_rate": 6.250993936352777e-06, "loss": 0.8242, "step": 12370 }, { "epoch": 0.4369660336705094, "grad_norm": 1.6525771617889404, "learning_rate": 6.250440111356274e-06, "loss": 0.8181, "step": 12371 }, { "epoch": 0.4370013554742173, "grad_norm": 1.5552812814712524, "learning_rate": 6.249886269994244e-06, "loss": 0.787, "step": 12372 }, { "epoch": 0.4370366772779252, "grad_norm": 1.699361801147461, "learning_rate": 6.249332412273939e-06, "loss": 0.81, "step": 12373 }, { "epoch": 0.4370719990816331, "grad_norm": 1.7025350332260132, "learning_rate": 6.248778538202605e-06, "loss": 0.8296, "step": 12374 }, { "epoch": 0.437107320885341, "grad_norm": 1.6256107091903687, "learning_rate": 6.248224647787493e-06, "loss": 0.8245, "step": 12375 }, { "epoch": 0.4371426426890489, "grad_norm": 1.731974720954895, "learning_rate": 6.2476707410358515e-06, "loss": 0.8047, "step": 12376 }, { "epoch": 0.4371779644927568, "grad_norm": 1.6512665748596191, "learning_rate": 6.24711681795493e-06, "loss": 0.797, "step": 12377 }, { "epoch": 0.4372132862964647, "grad_norm": 2.536989450454712, "learning_rate": 6.246562878551977e-06, "loss": 0.784, "step": 12378 }, { "epoch": 0.43724860810017263, "grad_norm": 1.7202428579330444, "learning_rate": 6.246008922834246e-06, "loss": 0.8004, "step": 12379 }, { "epoch": 0.43728392990388054, "grad_norm": 1.632694959640503, "learning_rate": 6.245454950808981e-06, "loss": 0.8068, "step": 12380 }, { "epoch": 0.43731925170758845, "grad_norm": 1.6186870336532593, "learning_rate": 6.244900962483438e-06, "loss": 0.7932, "step": 12381 }, { "epoch": 0.43735457351129636, "grad_norm": 1.714299201965332, "learning_rate": 6.244346957864864e-06, "loss": 0.8054, "step": 12382 }, { "epoch": 0.4373898953150043, "grad_norm": 1.5427868366241455, "learning_rate": 6.243792936960511e-06, "loss": 0.783, "step": 12383 }, { "epoch": 0.4374252171187122, "grad_norm": 2.2677786350250244, "learning_rate": 6.243238899777631e-06, "loss": 0.7973, "step": 12384 }, { "epoch": 0.4374605389224201, "grad_norm": 2.27616810798645, "learning_rate": 6.242684846323472e-06, "loss": 0.8278, "step": 12385 }, { "epoch": 0.437495860726128, "grad_norm": 1.6162618398666382, "learning_rate": 6.242130776605289e-06, "loss": 0.7814, "step": 12386 }, { "epoch": 0.43753118252983586, "grad_norm": 2.372459650039673, "learning_rate": 6.241576690630332e-06, "loss": 0.8194, "step": 12387 }, { "epoch": 0.43756650433354377, "grad_norm": 1.5403321981430054, "learning_rate": 6.241022588405853e-06, "loss": 0.7795, "step": 12388 }, { "epoch": 0.4376018261372517, "grad_norm": 1.5274345874786377, "learning_rate": 6.2404684699391026e-06, "loss": 0.7936, "step": 12389 }, { "epoch": 0.4376371479409596, "grad_norm": 1.5380687713623047, "learning_rate": 6.239914335237334e-06, "loss": 0.7708, "step": 12390 }, { "epoch": 0.4376724697446675, "grad_norm": 1.770870327949524, "learning_rate": 6.2393601843078e-06, "loss": 0.8182, "step": 12391 }, { "epoch": 0.4377077915483754, "grad_norm": 1.4873872995376587, "learning_rate": 6.238806017157753e-06, "loss": 0.7958, "step": 12392 }, { "epoch": 0.4377431133520833, "grad_norm": 1.596873164176941, "learning_rate": 6.238251833794444e-06, "loss": 0.7732, "step": 12393 }, { "epoch": 0.43777843515579123, "grad_norm": 1.6953293085098267, "learning_rate": 6.2376976342251305e-06, "loss": 0.7884, "step": 12394 }, { "epoch": 0.43781375695949915, "grad_norm": 1.6767325401306152, "learning_rate": 6.237143418457061e-06, "loss": 0.798, "step": 12395 }, { "epoch": 0.43784907876320706, "grad_norm": 1.4701824188232422, "learning_rate": 6.236589186497491e-06, "loss": 0.7962, "step": 12396 }, { "epoch": 0.43788440056691497, "grad_norm": 1.7180132865905762, "learning_rate": 6.2360349383536765e-06, "loss": 0.8184, "step": 12397 }, { "epoch": 0.4379197223706229, "grad_norm": 1.6347817182540894, "learning_rate": 6.235480674032868e-06, "loss": 0.8259, "step": 12398 }, { "epoch": 0.4379550441743308, "grad_norm": 1.6452884674072266, "learning_rate": 6.23492639354232e-06, "loss": 0.8307, "step": 12399 }, { "epoch": 0.43799036597803864, "grad_norm": 1.5105390548706055, "learning_rate": 6.234372096889288e-06, "loss": 0.7943, "step": 12400 }, { "epoch": 0.43802568778174655, "grad_norm": 1.6071617603302002, "learning_rate": 6.233817784081024e-06, "loss": 0.7858, "step": 12401 }, { "epoch": 0.43806100958545446, "grad_norm": 1.5894889831542969, "learning_rate": 6.233263455124788e-06, "loss": 0.7842, "step": 12402 }, { "epoch": 0.4380963313891624, "grad_norm": 1.5941894054412842, "learning_rate": 6.23270911002783e-06, "loss": 0.8141, "step": 12403 }, { "epoch": 0.4381316531928703, "grad_norm": 1.5378398895263672, "learning_rate": 6.232154748797407e-06, "loss": 0.7707, "step": 12404 }, { "epoch": 0.4381669749965782, "grad_norm": 1.6959495544433594, "learning_rate": 6.231600371440774e-06, "loss": 0.8061, "step": 12405 }, { "epoch": 0.4382022968002861, "grad_norm": 1.6243680715560913, "learning_rate": 6.231045977965188e-06, "loss": 0.8108, "step": 12406 }, { "epoch": 0.438237618603994, "grad_norm": 1.5708434581756592, "learning_rate": 6.2304915683779035e-06, "loss": 0.7718, "step": 12407 }, { "epoch": 0.4382729404077019, "grad_norm": 0.9456398487091064, "learning_rate": 6.229937142686175e-06, "loss": 0.5666, "step": 12408 }, { "epoch": 0.43830826221140984, "grad_norm": 1.631651520729065, "learning_rate": 6.2293827008972615e-06, "loss": 0.8154, "step": 12409 }, { "epoch": 0.43834358401511775, "grad_norm": 1.7021598815917969, "learning_rate": 6.228828243018419e-06, "loss": 0.7913, "step": 12410 }, { "epoch": 0.43837890581882566, "grad_norm": 1.963915228843689, "learning_rate": 6.228273769056903e-06, "loss": 0.8201, "step": 12411 }, { "epoch": 0.43841422762253357, "grad_norm": 1.71104896068573, "learning_rate": 6.2277192790199695e-06, "loss": 0.7842, "step": 12412 }, { "epoch": 0.4384495494262414, "grad_norm": 1.7551220655441284, "learning_rate": 6.227164772914878e-06, "loss": 0.801, "step": 12413 }, { "epoch": 0.43848487122994934, "grad_norm": 1.5591654777526855, "learning_rate": 6.226610250748883e-06, "loss": 0.8319, "step": 12414 }, { "epoch": 0.43852019303365725, "grad_norm": 1.6712692975997925, "learning_rate": 6.226055712529246e-06, "loss": 0.8177, "step": 12415 }, { "epoch": 0.43855551483736516, "grad_norm": 1.5637537240982056, "learning_rate": 6.225501158263223e-06, "loss": 0.8095, "step": 12416 }, { "epoch": 0.43859083664107307, "grad_norm": 1.6116605997085571, "learning_rate": 6.22494658795807e-06, "loss": 0.8009, "step": 12417 }, { "epoch": 0.438626158444781, "grad_norm": 1.5124324560165405, "learning_rate": 6.2243920016210456e-06, "loss": 0.7948, "step": 12418 }, { "epoch": 0.4386614802484889, "grad_norm": 1.6932929754257202, "learning_rate": 6.223837399259408e-06, "loss": 0.8307, "step": 12419 }, { "epoch": 0.4386968020521968, "grad_norm": 1.5629913806915283, "learning_rate": 6.223282780880419e-06, "loss": 0.8104, "step": 12420 }, { "epoch": 0.4387321238559047, "grad_norm": 1.0929380655288696, "learning_rate": 6.222728146491333e-06, "loss": 0.5884, "step": 12421 }, { "epoch": 0.4387674456596126, "grad_norm": 1.794378399848938, "learning_rate": 6.222173496099411e-06, "loss": 0.7917, "step": 12422 }, { "epoch": 0.43880276746332053, "grad_norm": 1.6711206436157227, "learning_rate": 6.221618829711913e-06, "loss": 0.8225, "step": 12423 }, { "epoch": 0.43883808926702844, "grad_norm": 1.6469730138778687, "learning_rate": 6.221064147336098e-06, "loss": 0.8086, "step": 12424 }, { "epoch": 0.43887341107073635, "grad_norm": 1.7342320680618286, "learning_rate": 6.220509448979224e-06, "loss": 0.8024, "step": 12425 }, { "epoch": 0.4389087328744442, "grad_norm": 1.6893571615219116, "learning_rate": 6.219954734648552e-06, "loss": 0.7795, "step": 12426 }, { "epoch": 0.4389440546781521, "grad_norm": 1.6641649007797241, "learning_rate": 6.2194000043513405e-06, "loss": 0.8059, "step": 12427 }, { "epoch": 0.43897937648186003, "grad_norm": 1.5907796621322632, "learning_rate": 6.218845258094853e-06, "loss": 0.8078, "step": 12428 }, { "epoch": 0.43901469828556794, "grad_norm": 1.5294387340545654, "learning_rate": 6.2182904958863465e-06, "loss": 0.7835, "step": 12429 }, { "epoch": 0.43905002008927585, "grad_norm": 1.5701730251312256, "learning_rate": 6.217735717733082e-06, "loss": 0.81, "step": 12430 }, { "epoch": 0.43908534189298376, "grad_norm": 1.6147360801696777, "learning_rate": 6.217180923642323e-06, "loss": 0.8208, "step": 12431 }, { "epoch": 0.43912066369669167, "grad_norm": 1.5821977853775024, "learning_rate": 6.216626113621329e-06, "loss": 0.787, "step": 12432 }, { "epoch": 0.4391559855003996, "grad_norm": 1.6910117864608765, "learning_rate": 6.21607128767736e-06, "loss": 0.7677, "step": 12433 }, { "epoch": 0.4391913073041075, "grad_norm": 1.6226747035980225, "learning_rate": 6.215516445817681e-06, "loss": 0.8364, "step": 12434 }, { "epoch": 0.4392266291078154, "grad_norm": 1.5952943563461304, "learning_rate": 6.214961588049549e-06, "loss": 0.8224, "step": 12435 }, { "epoch": 0.4392619509115233, "grad_norm": 1.5437448024749756, "learning_rate": 6.214406714380229e-06, "loss": 0.8164, "step": 12436 }, { "epoch": 0.4392972727152312, "grad_norm": 1.703101396560669, "learning_rate": 6.213851824816982e-06, "loss": 0.8349, "step": 12437 }, { "epoch": 0.43933259451893913, "grad_norm": 1.6955342292785645, "learning_rate": 6.213296919367069e-06, "loss": 0.7782, "step": 12438 }, { "epoch": 0.439367916322647, "grad_norm": 1.8355495929718018, "learning_rate": 6.2127419980377556e-06, "loss": 0.8067, "step": 12439 }, { "epoch": 0.4394032381263549, "grad_norm": 1.678632378578186, "learning_rate": 6.212187060836303e-06, "loss": 0.8056, "step": 12440 }, { "epoch": 0.4394385599300628, "grad_norm": 1.5949240922927856, "learning_rate": 6.211632107769974e-06, "loss": 0.8233, "step": 12441 }, { "epoch": 0.4394738817337707, "grad_norm": 1.7363327741622925, "learning_rate": 6.211077138846031e-06, "loss": 0.8209, "step": 12442 }, { "epoch": 0.43950920353747863, "grad_norm": 1.748576283454895, "learning_rate": 6.210522154071738e-06, "loss": 0.7751, "step": 12443 }, { "epoch": 0.43954452534118654, "grad_norm": 1.6007095575332642, "learning_rate": 6.209967153454361e-06, "loss": 0.8017, "step": 12444 }, { "epoch": 0.43957984714489445, "grad_norm": 1.5600050687789917, "learning_rate": 6.209412137001158e-06, "loss": 0.798, "step": 12445 }, { "epoch": 0.43961516894860236, "grad_norm": 1.665176272392273, "learning_rate": 6.208857104719398e-06, "loss": 0.8016, "step": 12446 }, { "epoch": 0.4396504907523103, "grad_norm": 1.5044957399368286, "learning_rate": 6.208302056616343e-06, "loss": 0.806, "step": 12447 }, { "epoch": 0.4396858125560182, "grad_norm": 1.5139092206954956, "learning_rate": 6.207746992699257e-06, "loss": 0.8091, "step": 12448 }, { "epoch": 0.4397211343597261, "grad_norm": 1.642579197883606, "learning_rate": 6.207191912975407e-06, "loss": 0.8065, "step": 12449 }, { "epoch": 0.439756456163434, "grad_norm": 1.7234420776367188, "learning_rate": 6.2066368174520564e-06, "loss": 0.8308, "step": 12450 }, { "epoch": 0.4397917779671419, "grad_norm": 1.7284409999847412, "learning_rate": 6.206081706136467e-06, "loss": 0.8227, "step": 12451 }, { "epoch": 0.43982709977084977, "grad_norm": 1.7051714658737183, "learning_rate": 6.20552657903591e-06, "loss": 0.8329, "step": 12452 }, { "epoch": 0.4398624215745577, "grad_norm": 1.5478732585906982, "learning_rate": 6.204971436157647e-06, "loss": 0.7696, "step": 12453 }, { "epoch": 0.4398977433782656, "grad_norm": 1.6910876035690308, "learning_rate": 6.204416277508945e-06, "loss": 0.8016, "step": 12454 }, { "epoch": 0.4399330651819735, "grad_norm": 1.7840194702148438, "learning_rate": 6.203861103097068e-06, "loss": 0.8005, "step": 12455 }, { "epoch": 0.4399683869856814, "grad_norm": 1.603237509727478, "learning_rate": 6.203305912929283e-06, "loss": 0.8107, "step": 12456 }, { "epoch": 0.4400037087893893, "grad_norm": 1.5622007846832275, "learning_rate": 6.2027507070128564e-06, "loss": 0.8024, "step": 12457 }, { "epoch": 0.44003903059309724, "grad_norm": 1.593689203262329, "learning_rate": 6.202195485355056e-06, "loss": 0.7808, "step": 12458 }, { "epoch": 0.44007435239680515, "grad_norm": 1.617893099784851, "learning_rate": 6.201640247963145e-06, "loss": 0.7946, "step": 12459 }, { "epoch": 0.44010967420051306, "grad_norm": 1.497269868850708, "learning_rate": 6.201084994844394e-06, "loss": 0.8051, "step": 12460 }, { "epoch": 0.44014499600422097, "grad_norm": 1.632432460784912, "learning_rate": 6.200529726006068e-06, "loss": 0.8288, "step": 12461 }, { "epoch": 0.4401803178079289, "grad_norm": 1.499610424041748, "learning_rate": 6.199974441455435e-06, "loss": 0.8187, "step": 12462 }, { "epoch": 0.4402156396116368, "grad_norm": 1.578543782234192, "learning_rate": 6.199419141199763e-06, "loss": 0.8087, "step": 12463 }, { "epoch": 0.4402509614153447, "grad_norm": 1.8625613451004028, "learning_rate": 6.198863825246316e-06, "loss": 0.8322, "step": 12464 }, { "epoch": 0.44028628321905255, "grad_norm": 1.7085366249084473, "learning_rate": 6.1983084936023664e-06, "loss": 0.8063, "step": 12465 }, { "epoch": 0.44032160502276046, "grad_norm": 1.7690777778625488, "learning_rate": 6.197753146275181e-06, "loss": 0.7909, "step": 12466 }, { "epoch": 0.4403569268264684, "grad_norm": 1.6218763589859009, "learning_rate": 6.197197783272025e-06, "loss": 0.7852, "step": 12467 }, { "epoch": 0.4403922486301763, "grad_norm": 1.5248634815216064, "learning_rate": 6.196642404600171e-06, "loss": 0.8136, "step": 12468 }, { "epoch": 0.4404275704338842, "grad_norm": 1.6779024600982666, "learning_rate": 6.1960870102668865e-06, "loss": 0.8275, "step": 12469 }, { "epoch": 0.4404628922375921, "grad_norm": 1.8849974870681763, "learning_rate": 6.19553160027944e-06, "loss": 0.7686, "step": 12470 }, { "epoch": 0.4404982140413, "grad_norm": 1.5927537679672241, "learning_rate": 6.194976174645101e-06, "loss": 0.7762, "step": 12471 }, { "epoch": 0.44053353584500793, "grad_norm": 1.729225993156433, "learning_rate": 6.194420733371137e-06, "loss": 0.8166, "step": 12472 }, { "epoch": 0.44056885764871584, "grad_norm": 1.5394145250320435, "learning_rate": 6.193865276464821e-06, "loss": 0.7961, "step": 12473 }, { "epoch": 0.44060417945242375, "grad_norm": 1.6608027219772339, "learning_rate": 6.193309803933419e-06, "loss": 0.814, "step": 12474 }, { "epoch": 0.44063950125613166, "grad_norm": 1.886573076248169, "learning_rate": 6.192754315784203e-06, "loss": 0.8316, "step": 12475 }, { "epoch": 0.44067482305983957, "grad_norm": 1.6830805540084839, "learning_rate": 6.192198812024442e-06, "loss": 0.7941, "step": 12476 }, { "epoch": 0.4407101448635475, "grad_norm": 1.7217074632644653, "learning_rate": 6.1916432926614074e-06, "loss": 0.8058, "step": 12477 }, { "epoch": 0.44074546666725534, "grad_norm": 1.850390076637268, "learning_rate": 6.191087757702371e-06, "loss": 0.8216, "step": 12478 }, { "epoch": 0.44078078847096325, "grad_norm": 1.6646679639816284, "learning_rate": 6.190532207154601e-06, "loss": 0.8053, "step": 12479 }, { "epoch": 0.44081611027467116, "grad_norm": 6.057415962219238, "learning_rate": 6.189976641025369e-06, "loss": 0.7662, "step": 12480 }, { "epoch": 0.44085143207837907, "grad_norm": 1.65591299533844, "learning_rate": 6.189421059321947e-06, "loss": 0.7683, "step": 12481 }, { "epoch": 0.440886753882087, "grad_norm": 1.706164002418518, "learning_rate": 6.188865462051605e-06, "loss": 0.7891, "step": 12482 }, { "epoch": 0.4409220756857949, "grad_norm": 1.7531688213348389, "learning_rate": 6.188309849221615e-06, "loss": 0.8153, "step": 12483 }, { "epoch": 0.4409573974895028, "grad_norm": 1.6034680604934692, "learning_rate": 6.18775422083925e-06, "loss": 0.8341, "step": 12484 }, { "epoch": 0.4409927192932107, "grad_norm": 1.6516207456588745, "learning_rate": 6.187198576911781e-06, "loss": 0.7806, "step": 12485 }, { "epoch": 0.4410280410969186, "grad_norm": 1.7366838455200195, "learning_rate": 6.1866429174464795e-06, "loss": 0.8377, "step": 12486 }, { "epoch": 0.44106336290062653, "grad_norm": 1.6339246034622192, "learning_rate": 6.18608724245062e-06, "loss": 0.7917, "step": 12487 }, { "epoch": 0.44109868470433444, "grad_norm": 1.743363857269287, "learning_rate": 6.185531551931472e-06, "loss": 0.8155, "step": 12488 }, { "epoch": 0.44113400650804235, "grad_norm": 1.790761113166809, "learning_rate": 6.18497584589631e-06, "loss": 0.8345, "step": 12489 }, { "epoch": 0.44116932831175026, "grad_norm": 1.5468723773956299, "learning_rate": 6.184420124352408e-06, "loss": 0.8057, "step": 12490 }, { "epoch": 0.4412046501154581, "grad_norm": 1.5473459959030151, "learning_rate": 6.183864387307038e-06, "loss": 0.7964, "step": 12491 }, { "epoch": 0.44123997191916603, "grad_norm": 1.541682243347168, "learning_rate": 6.183308634767473e-06, "loss": 0.8468, "step": 12492 }, { "epoch": 0.44127529372287394, "grad_norm": 1.4971879720687866, "learning_rate": 6.182752866740986e-06, "loss": 0.7826, "step": 12493 }, { "epoch": 0.44131061552658185, "grad_norm": 1.7331252098083496, "learning_rate": 6.182197083234853e-06, "loss": 0.7935, "step": 12494 }, { "epoch": 0.44134593733028976, "grad_norm": 1.652621865272522, "learning_rate": 6.1816412842563466e-06, "loss": 0.8065, "step": 12495 }, { "epoch": 0.44138125913399767, "grad_norm": 1.560935616493225, "learning_rate": 6.181085469812741e-06, "loss": 0.8059, "step": 12496 }, { "epoch": 0.4414165809377056, "grad_norm": 1.658272624015808, "learning_rate": 6.180529639911311e-06, "loss": 0.7874, "step": 12497 }, { "epoch": 0.4414519027414135, "grad_norm": 1.5747387409210205, "learning_rate": 6.179973794559331e-06, "loss": 0.7821, "step": 12498 }, { "epoch": 0.4414872245451214, "grad_norm": 1.577744483947754, "learning_rate": 6.179417933764076e-06, "loss": 0.8066, "step": 12499 }, { "epoch": 0.4415225463488293, "grad_norm": 1.5932985544204712, "learning_rate": 6.17886205753282e-06, "loss": 0.8088, "step": 12500 }, { "epoch": 0.4415578681525372, "grad_norm": 1.6017674207687378, "learning_rate": 6.1783061658728395e-06, "loss": 0.8368, "step": 12501 }, { "epoch": 0.44159318995624514, "grad_norm": 1.560359239578247, "learning_rate": 6.177750258791409e-06, "loss": 0.7749, "step": 12502 }, { "epoch": 0.44162851175995305, "grad_norm": 1.8198000192642212, "learning_rate": 6.1771943362958065e-06, "loss": 0.8, "step": 12503 }, { "epoch": 0.4416638335636609, "grad_norm": 1.5249497890472412, "learning_rate": 6.176638398393303e-06, "loss": 0.8245, "step": 12504 }, { "epoch": 0.4416991553673688, "grad_norm": 1.6584739685058594, "learning_rate": 6.176082445091179e-06, "loss": 0.7732, "step": 12505 }, { "epoch": 0.4417344771710767, "grad_norm": 1.7263407707214355, "learning_rate": 6.175526476396707e-06, "loss": 0.8194, "step": 12506 }, { "epoch": 0.44176979897478463, "grad_norm": 1.686519742012024, "learning_rate": 6.1749704923171675e-06, "loss": 0.8242, "step": 12507 }, { "epoch": 0.44180512077849254, "grad_norm": 1.6520320177078247, "learning_rate": 6.174414492859836e-06, "loss": 0.7841, "step": 12508 }, { "epoch": 0.44184044258220045, "grad_norm": 1.778684139251709, "learning_rate": 6.173858478031986e-06, "loss": 0.8093, "step": 12509 }, { "epoch": 0.44187576438590837, "grad_norm": 1.6751948595046997, "learning_rate": 6.173302447840899e-06, "loss": 0.7704, "step": 12510 }, { "epoch": 0.4419110861896163, "grad_norm": 1.6622003316879272, "learning_rate": 6.172746402293847e-06, "loss": 0.8094, "step": 12511 }, { "epoch": 0.4419464079933242, "grad_norm": 1.5070735216140747, "learning_rate": 6.1721903413981135e-06, "loss": 0.7862, "step": 12512 }, { "epoch": 0.4419817297970321, "grad_norm": 1.6929287910461426, "learning_rate": 6.171634265160973e-06, "loss": 0.8423, "step": 12513 }, { "epoch": 0.44201705160074, "grad_norm": 1.5447285175323486, "learning_rate": 6.171078173589702e-06, "loss": 0.7973, "step": 12514 }, { "epoch": 0.4420523734044479, "grad_norm": 1.748542308807373, "learning_rate": 6.17052206669158e-06, "loss": 0.8132, "step": 12515 }, { "epoch": 0.44208769520815583, "grad_norm": 1.7413053512573242, "learning_rate": 6.1699659444738865e-06, "loss": 0.7974, "step": 12516 }, { "epoch": 0.4421230170118637, "grad_norm": 1.46889066696167, "learning_rate": 6.169409806943898e-06, "loss": 0.784, "step": 12517 }, { "epoch": 0.4421583388155716, "grad_norm": 1.6145102977752686, "learning_rate": 6.168853654108895e-06, "loss": 0.8024, "step": 12518 }, { "epoch": 0.4421936606192795, "grad_norm": 0.9734280109405518, "learning_rate": 6.168297485976152e-06, "loss": 0.6021, "step": 12519 }, { "epoch": 0.4422289824229874, "grad_norm": 1.785826325416565, "learning_rate": 6.1677413025529545e-06, "loss": 0.8236, "step": 12520 }, { "epoch": 0.4422643042266953, "grad_norm": 1.7108350992202759, "learning_rate": 6.167185103846577e-06, "loss": 0.794, "step": 12521 }, { "epoch": 0.44229962603040324, "grad_norm": 1.5406758785247803, "learning_rate": 6.166628889864299e-06, "loss": 0.8156, "step": 12522 }, { "epoch": 0.44233494783411115, "grad_norm": 1.5285353660583496, "learning_rate": 6.166072660613402e-06, "loss": 0.7829, "step": 12523 }, { "epoch": 0.44237026963781906, "grad_norm": 1.5587316751480103, "learning_rate": 6.165516416101167e-06, "loss": 0.8332, "step": 12524 }, { "epoch": 0.44240559144152697, "grad_norm": 1.50222647190094, "learning_rate": 6.16496015633487e-06, "loss": 0.8134, "step": 12525 }, { "epoch": 0.4424409132452349, "grad_norm": 1.5287750959396362, "learning_rate": 6.164403881321794e-06, "loss": 0.7857, "step": 12526 }, { "epoch": 0.4424762350489428, "grad_norm": 1.6565675735473633, "learning_rate": 6.163847591069221e-06, "loss": 0.8401, "step": 12527 }, { "epoch": 0.4425115568526507, "grad_norm": 1.4735161066055298, "learning_rate": 6.163291285584428e-06, "loss": 0.7996, "step": 12528 }, { "epoch": 0.4425468786563586, "grad_norm": 1.615584135055542, "learning_rate": 6.162734964874697e-06, "loss": 0.8116, "step": 12529 }, { "epoch": 0.44258220046006647, "grad_norm": 1.4740089178085327, "learning_rate": 6.162178628947308e-06, "loss": 0.8071, "step": 12530 }, { "epoch": 0.4426175222637744, "grad_norm": 1.5706024169921875, "learning_rate": 6.161622277809546e-06, "loss": 0.8415, "step": 12531 }, { "epoch": 0.4426528440674823, "grad_norm": 1.5400104522705078, "learning_rate": 6.16106591146869e-06, "loss": 0.81, "step": 12532 }, { "epoch": 0.4426881658711902, "grad_norm": 1.8075882196426392, "learning_rate": 6.16050952993202e-06, "loss": 0.7971, "step": 12533 }, { "epoch": 0.4427234876748981, "grad_norm": 1.5806660652160645, "learning_rate": 6.15995313320682e-06, "loss": 0.7831, "step": 12534 }, { "epoch": 0.442758809478606, "grad_norm": 1.6105701923370361, "learning_rate": 6.15939672130037e-06, "loss": 0.8071, "step": 12535 }, { "epoch": 0.44279413128231393, "grad_norm": 1.5633985996246338, "learning_rate": 6.158840294219958e-06, "loss": 0.7929, "step": 12536 }, { "epoch": 0.44282945308602184, "grad_norm": 1.6203687191009521, "learning_rate": 6.158283851972858e-06, "loss": 0.774, "step": 12537 }, { "epoch": 0.44286477488972975, "grad_norm": 1.6376014947891235, "learning_rate": 6.157727394566357e-06, "loss": 0.8031, "step": 12538 }, { "epoch": 0.44290009669343766, "grad_norm": 1.6008018255233765, "learning_rate": 6.1571709220077405e-06, "loss": 0.826, "step": 12539 }, { "epoch": 0.44293541849714557, "grad_norm": 1.769555687904358, "learning_rate": 6.156614434304284e-06, "loss": 0.8207, "step": 12540 }, { "epoch": 0.4429707403008535, "grad_norm": 1.626632571220398, "learning_rate": 6.156057931463278e-06, "loss": 0.8663, "step": 12541 }, { "epoch": 0.4430060621045614, "grad_norm": 1.797247290611267, "learning_rate": 6.155501413492005e-06, "loss": 0.7989, "step": 12542 }, { "epoch": 0.44304138390826925, "grad_norm": 1.697885513305664, "learning_rate": 6.1549448803977416e-06, "loss": 0.8062, "step": 12543 }, { "epoch": 0.44307670571197716, "grad_norm": 1.8825936317443848, "learning_rate": 6.154388332187781e-06, "loss": 0.8058, "step": 12544 }, { "epoch": 0.44311202751568507, "grad_norm": 1.4937846660614014, "learning_rate": 6.153831768869402e-06, "loss": 0.7843, "step": 12545 }, { "epoch": 0.443147349319393, "grad_norm": 1.7088146209716797, "learning_rate": 6.153275190449889e-06, "loss": 0.8215, "step": 12546 }, { "epoch": 0.4431826711231009, "grad_norm": 1.8097959756851196, "learning_rate": 6.152718596936528e-06, "loss": 0.7973, "step": 12547 }, { "epoch": 0.4432179929268088, "grad_norm": 1.5529545545578003, "learning_rate": 6.1521619883366e-06, "loss": 0.7789, "step": 12548 }, { "epoch": 0.4432533147305167, "grad_norm": 1.599176287651062, "learning_rate": 6.151605364657396e-06, "loss": 0.7883, "step": 12549 }, { "epoch": 0.4432886365342246, "grad_norm": 1.7499736547470093, "learning_rate": 6.151048725906195e-06, "loss": 0.8206, "step": 12550 }, { "epoch": 0.44332395833793253, "grad_norm": 1.5508543252944946, "learning_rate": 6.150492072090286e-06, "loss": 0.865, "step": 12551 }, { "epoch": 0.44335928014164044, "grad_norm": 1.8357373476028442, "learning_rate": 6.149935403216952e-06, "loss": 0.7923, "step": 12552 }, { "epoch": 0.44339460194534835, "grad_norm": 1.7181575298309326, "learning_rate": 6.14937871929348e-06, "loss": 0.7619, "step": 12553 }, { "epoch": 0.44342992374905627, "grad_norm": 1.6638137102127075, "learning_rate": 6.1488220203271565e-06, "loss": 0.8091, "step": 12554 }, { "epoch": 0.4434652455527642, "grad_norm": 1.6072559356689453, "learning_rate": 6.148265306325266e-06, "loss": 0.7671, "step": 12555 }, { "epoch": 0.44350056735647203, "grad_norm": 1.5573796033859253, "learning_rate": 6.147708577295094e-06, "loss": 0.7855, "step": 12556 }, { "epoch": 0.44353588916017994, "grad_norm": 1.7702168226242065, "learning_rate": 6.147151833243928e-06, "loss": 0.8326, "step": 12557 }, { "epoch": 0.44357121096388785, "grad_norm": 1.7290539741516113, "learning_rate": 6.146595074179056e-06, "loss": 0.7875, "step": 12558 }, { "epoch": 0.44360653276759576, "grad_norm": 1.5918668508529663, "learning_rate": 6.146038300107761e-06, "loss": 0.7798, "step": 12559 }, { "epoch": 0.4436418545713037, "grad_norm": 1.8581793308258057, "learning_rate": 6.145481511037333e-06, "loss": 0.8114, "step": 12560 }, { "epoch": 0.4436771763750116, "grad_norm": 1.604779839515686, "learning_rate": 6.1449247069750585e-06, "loss": 0.7929, "step": 12561 }, { "epoch": 0.4437124981787195, "grad_norm": 1.5155683755874634, "learning_rate": 6.1443678879282245e-06, "loss": 0.7924, "step": 12562 }, { "epoch": 0.4437478199824274, "grad_norm": 1.7243636846542358, "learning_rate": 6.143811053904119e-06, "loss": 0.8015, "step": 12563 }, { "epoch": 0.4437831417861353, "grad_norm": 1.560591697692871, "learning_rate": 6.14325420491003e-06, "loss": 0.7832, "step": 12564 }, { "epoch": 0.4438184635898432, "grad_norm": 1.7462483644485474, "learning_rate": 6.142697340953244e-06, "loss": 0.8013, "step": 12565 }, { "epoch": 0.44385378539355114, "grad_norm": 1.5964604616165161, "learning_rate": 6.142140462041051e-06, "loss": 0.7919, "step": 12566 }, { "epoch": 0.44388910719725905, "grad_norm": 1.6902154684066772, "learning_rate": 6.141583568180736e-06, "loss": 0.8229, "step": 12567 }, { "epoch": 0.44392442900096696, "grad_norm": 1.6223089694976807, "learning_rate": 6.141026659379592e-06, "loss": 0.8034, "step": 12568 }, { "epoch": 0.4439597508046748, "grad_norm": 1.599509835243225, "learning_rate": 6.140469735644904e-06, "loss": 0.7802, "step": 12569 }, { "epoch": 0.4439950726083827, "grad_norm": 1.6018792390823364, "learning_rate": 6.139912796983963e-06, "loss": 0.8173, "step": 12570 }, { "epoch": 0.44403039441209063, "grad_norm": 1.715118169784546, "learning_rate": 6.139355843404059e-06, "loss": 0.8038, "step": 12571 }, { "epoch": 0.44406571621579854, "grad_norm": 1.639463186264038, "learning_rate": 6.138798874912477e-06, "loss": 0.7903, "step": 12572 }, { "epoch": 0.44410103801950646, "grad_norm": 1.5527572631835938, "learning_rate": 6.1382418915165135e-06, "loss": 0.7786, "step": 12573 }, { "epoch": 0.44413635982321437, "grad_norm": 1.571068525314331, "learning_rate": 6.137684893223451e-06, "loss": 0.813, "step": 12574 }, { "epoch": 0.4441716816269223, "grad_norm": 1.638535976409912, "learning_rate": 6.1371278800405835e-06, "loss": 0.8172, "step": 12575 }, { "epoch": 0.4442070034306302, "grad_norm": 1.5967251062393188, "learning_rate": 6.136570851975201e-06, "loss": 0.8039, "step": 12576 }, { "epoch": 0.4442423252343381, "grad_norm": 1.6363352537155151, "learning_rate": 6.13601380903459e-06, "loss": 0.8392, "step": 12577 }, { "epoch": 0.444277647038046, "grad_norm": 1.6156519651412964, "learning_rate": 6.135456751226046e-06, "loss": 0.7801, "step": 12578 }, { "epoch": 0.4443129688417539, "grad_norm": 1.6456509828567505, "learning_rate": 6.1348996785568575e-06, "loss": 0.8374, "step": 12579 }, { "epoch": 0.44434829064546183, "grad_norm": 1.6798133850097656, "learning_rate": 6.134342591034315e-06, "loss": 0.7861, "step": 12580 }, { "epoch": 0.44438361244916974, "grad_norm": 1.7099233865737915, "learning_rate": 6.133785488665709e-06, "loss": 0.7946, "step": 12581 }, { "epoch": 0.4444189342528776, "grad_norm": 1.7441598176956177, "learning_rate": 6.133228371458333e-06, "loss": 0.7564, "step": 12582 }, { "epoch": 0.4444542560565855, "grad_norm": 1.8836467266082764, "learning_rate": 6.132671239419476e-06, "loss": 0.799, "step": 12583 }, { "epoch": 0.4444895778602934, "grad_norm": 1.6631288528442383, "learning_rate": 6.132114092556433e-06, "loss": 0.7879, "step": 12584 }, { "epoch": 0.4445248996640013, "grad_norm": 1.9457393884658813, "learning_rate": 6.13155693087649e-06, "loss": 0.804, "step": 12585 }, { "epoch": 0.44456022146770924, "grad_norm": 1.498794436454773, "learning_rate": 6.130999754386945e-06, "loss": 0.7918, "step": 12586 }, { "epoch": 0.44459554327141715, "grad_norm": 1.5977758169174194, "learning_rate": 6.130442563095088e-06, "loss": 0.7859, "step": 12587 }, { "epoch": 0.44463086507512506, "grad_norm": 1.7910898923873901, "learning_rate": 6.129885357008209e-06, "loss": 0.7898, "step": 12588 }, { "epoch": 0.44466618687883297, "grad_norm": 1.758482575416565, "learning_rate": 6.129328136133605e-06, "loss": 0.8066, "step": 12589 }, { "epoch": 0.4447015086825409, "grad_norm": 1.718973159790039, "learning_rate": 6.128770900478566e-06, "loss": 0.8274, "step": 12590 }, { "epoch": 0.4447368304862488, "grad_norm": 1.6854008436203003, "learning_rate": 6.128213650050386e-06, "loss": 0.8217, "step": 12591 }, { "epoch": 0.4447721522899567, "grad_norm": 1.8944146633148193, "learning_rate": 6.127656384856358e-06, "loss": 0.8625, "step": 12592 }, { "epoch": 0.4448074740936646, "grad_norm": 1.8635886907577515, "learning_rate": 6.127099104903774e-06, "loss": 0.7705, "step": 12593 }, { "epoch": 0.4448427958973725, "grad_norm": 1.5881025791168213, "learning_rate": 6.12654181019993e-06, "loss": 0.8142, "step": 12594 }, { "epoch": 0.44487811770108043, "grad_norm": 1.593618392944336, "learning_rate": 6.125984500752118e-06, "loss": 0.8014, "step": 12595 }, { "epoch": 0.4449134395047883, "grad_norm": 1.6894645690917969, "learning_rate": 6.125427176567632e-06, "loss": 0.7761, "step": 12596 }, { "epoch": 0.4449487613084962, "grad_norm": 1.7426949739456177, "learning_rate": 6.124869837653767e-06, "loss": 0.8089, "step": 12597 }, { "epoch": 0.4449840831122041, "grad_norm": 1.637097954750061, "learning_rate": 6.124312484017817e-06, "loss": 0.7922, "step": 12598 }, { "epoch": 0.445019404915912, "grad_norm": 1.6836053133010864, "learning_rate": 6.123755115667077e-06, "loss": 0.7927, "step": 12599 }, { "epoch": 0.44505472671961993, "grad_norm": 1.55283522605896, "learning_rate": 6.123197732608842e-06, "loss": 0.7994, "step": 12600 }, { "epoch": 0.44509004852332784, "grad_norm": 1.736808180809021, "learning_rate": 6.122640334850405e-06, "loss": 0.7907, "step": 12601 }, { "epoch": 0.44512537032703575, "grad_norm": 1.6322650909423828, "learning_rate": 6.122082922399063e-06, "loss": 0.8161, "step": 12602 }, { "epoch": 0.44516069213074366, "grad_norm": 1.5936553478240967, "learning_rate": 6.12152549526211e-06, "loss": 0.8368, "step": 12603 }, { "epoch": 0.4451960139344516, "grad_norm": 1.607086181640625, "learning_rate": 6.120968053446842e-06, "loss": 0.8125, "step": 12604 }, { "epoch": 0.4452313357381595, "grad_norm": 1.7340855598449707, "learning_rate": 6.1204105969605555e-06, "loss": 0.7972, "step": 12605 }, { "epoch": 0.4452666575418674, "grad_norm": 1.8780518770217896, "learning_rate": 6.119853125810544e-06, "loss": 0.804, "step": 12606 }, { "epoch": 0.4453019793455753, "grad_norm": 1.6748266220092773, "learning_rate": 6.119295640004107e-06, "loss": 0.8039, "step": 12607 }, { "epoch": 0.4453373011492832, "grad_norm": 1.7220945358276367, "learning_rate": 6.118738139548539e-06, "loss": 0.79, "step": 12608 }, { "epoch": 0.44537262295299107, "grad_norm": 1.7127721309661865, "learning_rate": 6.118180624451137e-06, "loss": 0.7768, "step": 12609 }, { "epoch": 0.445407944756699, "grad_norm": 1.6603357791900635, "learning_rate": 6.117623094719197e-06, "loss": 0.842, "step": 12610 }, { "epoch": 0.4454432665604069, "grad_norm": 1.4547958374023438, "learning_rate": 6.117065550360015e-06, "loss": 0.7686, "step": 12611 }, { "epoch": 0.4454785883641148, "grad_norm": 1.7105069160461426, "learning_rate": 6.11650799138089e-06, "loss": 0.7853, "step": 12612 }, { "epoch": 0.4455139101678227, "grad_norm": 1.7045761346817017, "learning_rate": 6.115950417789118e-06, "loss": 0.8111, "step": 12613 }, { "epoch": 0.4455492319715306, "grad_norm": 1.7113367319107056, "learning_rate": 6.115392829591995e-06, "loss": 0.8058, "step": 12614 }, { "epoch": 0.44558455377523853, "grad_norm": 1.5471779108047485, "learning_rate": 6.1148352267968225e-06, "loss": 0.7942, "step": 12615 }, { "epoch": 0.44561987557894644, "grad_norm": 1.597731113433838, "learning_rate": 6.114277609410897e-06, "loss": 0.7964, "step": 12616 }, { "epoch": 0.44565519738265436, "grad_norm": 1.5921123027801514, "learning_rate": 6.113719977441512e-06, "loss": 0.8186, "step": 12617 }, { "epoch": 0.44569051918636227, "grad_norm": 2.7305080890655518, "learning_rate": 6.113162330895972e-06, "loss": 0.7596, "step": 12618 }, { "epoch": 0.4457258409900702, "grad_norm": 1.7108179330825806, "learning_rate": 6.112604669781572e-06, "loss": 0.8036, "step": 12619 }, { "epoch": 0.4457611627937781, "grad_norm": 1.6402958631515503, "learning_rate": 6.112046994105612e-06, "loss": 0.7888, "step": 12620 }, { "epoch": 0.445796484597486, "grad_norm": 1.690028190612793, "learning_rate": 6.11148930387539e-06, "loss": 0.8398, "step": 12621 }, { "epoch": 0.44583180640119385, "grad_norm": 1.8776521682739258, "learning_rate": 6.1109315990982035e-06, "loss": 0.7941, "step": 12622 }, { "epoch": 0.44586712820490176, "grad_norm": 1.5120717287063599, "learning_rate": 6.110373879781354e-06, "loss": 0.8054, "step": 12623 }, { "epoch": 0.4459024500086097, "grad_norm": 1.687024474143982, "learning_rate": 6.10981614593214e-06, "loss": 0.8483, "step": 12624 }, { "epoch": 0.4459377718123176, "grad_norm": 1.6611534357070923, "learning_rate": 6.10925839755786e-06, "loss": 0.8079, "step": 12625 }, { "epoch": 0.4459730936160255, "grad_norm": 1.6016747951507568, "learning_rate": 6.108700634665815e-06, "loss": 0.8153, "step": 12626 }, { "epoch": 0.4460084154197334, "grad_norm": 1.7290462255477905, "learning_rate": 6.1081428572633035e-06, "loss": 0.7984, "step": 12627 }, { "epoch": 0.4460437372234413, "grad_norm": 1.7007856369018555, "learning_rate": 6.107585065357629e-06, "loss": 0.8059, "step": 12628 }, { "epoch": 0.4460790590271492, "grad_norm": 1.7268133163452148, "learning_rate": 6.10702725895609e-06, "loss": 0.7891, "step": 12629 }, { "epoch": 0.44611438083085714, "grad_norm": 1.7201555967330933, "learning_rate": 6.106469438065983e-06, "loss": 0.7811, "step": 12630 }, { "epoch": 0.44614970263456505, "grad_norm": 1.5438435077667236, "learning_rate": 6.105911602694614e-06, "loss": 0.8142, "step": 12631 }, { "epoch": 0.44618502443827296, "grad_norm": 1.8903788328170776, "learning_rate": 6.105353752849281e-06, "loss": 0.7864, "step": 12632 }, { "epoch": 0.44622034624198087, "grad_norm": 1.8076351881027222, "learning_rate": 6.104795888537286e-06, "loss": 0.7964, "step": 12633 }, { "epoch": 0.4462556680456888, "grad_norm": 1.871559500694275, "learning_rate": 6.1042380097659305e-06, "loss": 0.8111, "step": 12634 }, { "epoch": 0.44629098984939664, "grad_norm": 1.593766212463379, "learning_rate": 6.103680116542514e-06, "loss": 0.7908, "step": 12635 }, { "epoch": 0.44632631165310455, "grad_norm": 1.6458736658096313, "learning_rate": 6.1031222088743414e-06, "loss": 0.778, "step": 12636 }, { "epoch": 0.44636163345681246, "grad_norm": 1.678861141204834, "learning_rate": 6.102564286768713e-06, "loss": 0.8074, "step": 12637 }, { "epoch": 0.44639695526052037, "grad_norm": 1.9193671941757202, "learning_rate": 6.1020063502329295e-06, "loss": 0.7816, "step": 12638 }, { "epoch": 0.4464322770642283, "grad_norm": 1.6565974950790405, "learning_rate": 6.101448399274294e-06, "loss": 0.8177, "step": 12639 }, { "epoch": 0.4464675988679362, "grad_norm": 1.4960870742797852, "learning_rate": 6.1008904339001095e-06, "loss": 0.7771, "step": 12640 }, { "epoch": 0.4465029206716441, "grad_norm": 1.7332382202148438, "learning_rate": 6.1003324541176775e-06, "loss": 0.7987, "step": 12641 }, { "epoch": 0.446538242475352, "grad_norm": 1.5549426078796387, "learning_rate": 6.099774459934302e-06, "loss": 0.8015, "step": 12642 }, { "epoch": 0.4465735642790599, "grad_norm": 1.6298691034317017, "learning_rate": 6.099216451357284e-06, "loss": 0.8121, "step": 12643 }, { "epoch": 0.44660888608276783, "grad_norm": 1.6998252868652344, "learning_rate": 6.098658428393928e-06, "loss": 0.794, "step": 12644 }, { "epoch": 0.44664420788647574, "grad_norm": 1.5580666065216064, "learning_rate": 6.098100391051537e-06, "loss": 0.7812, "step": 12645 }, { "epoch": 0.44667952969018365, "grad_norm": 1.6880297660827637, "learning_rate": 6.097542339337413e-06, "loss": 0.8248, "step": 12646 }, { "epoch": 0.44671485149389156, "grad_norm": 1.665496587753296, "learning_rate": 6.0969842732588645e-06, "loss": 0.7643, "step": 12647 }, { "epoch": 0.4467501732975994, "grad_norm": 1.5762847661972046, "learning_rate": 6.096426192823191e-06, "loss": 0.7883, "step": 12648 }, { "epoch": 0.44678549510130733, "grad_norm": 1.7571955919265747, "learning_rate": 6.095868098037696e-06, "loss": 0.8038, "step": 12649 }, { "epoch": 0.44682081690501524, "grad_norm": 1.633223295211792, "learning_rate": 6.095309988909686e-06, "loss": 0.7984, "step": 12650 }, { "epoch": 0.44685613870872315, "grad_norm": 1.750768780708313, "learning_rate": 6.094751865446464e-06, "loss": 0.798, "step": 12651 }, { "epoch": 0.44689146051243106, "grad_norm": 0.9676448106765747, "learning_rate": 6.094193727655337e-06, "loss": 0.5919, "step": 12652 }, { "epoch": 0.44692678231613897, "grad_norm": 2.444268226623535, "learning_rate": 6.093635575543608e-06, "loss": 0.812, "step": 12653 }, { "epoch": 0.4469621041198469, "grad_norm": 1.7120563983917236, "learning_rate": 6.093077409118581e-06, "loss": 0.8016, "step": 12654 }, { "epoch": 0.4469974259235548, "grad_norm": 1.9221675395965576, "learning_rate": 6.092519228387564e-06, "loss": 0.7971, "step": 12655 }, { "epoch": 0.4470327477272627, "grad_norm": 1.5858114957809448, "learning_rate": 6.091961033357859e-06, "loss": 0.7824, "step": 12656 }, { "epoch": 0.4470680695309706, "grad_norm": 1.778114676475525, "learning_rate": 6.091402824036775e-06, "loss": 0.8583, "step": 12657 }, { "epoch": 0.4471033913346785, "grad_norm": 1.7770907878875732, "learning_rate": 6.090844600431616e-06, "loss": 0.802, "step": 12658 }, { "epoch": 0.44713871313838643, "grad_norm": 1.5158107280731201, "learning_rate": 6.090286362549686e-06, "loss": 0.7716, "step": 12659 }, { "epoch": 0.44717403494209435, "grad_norm": 1.7852908372879028, "learning_rate": 6.089728110398294e-06, "loss": 0.844, "step": 12660 }, { "epoch": 0.4472093567458022, "grad_norm": 1.4306021928787231, "learning_rate": 6.0891698439847445e-06, "loss": 0.7767, "step": 12661 }, { "epoch": 0.4472446785495101, "grad_norm": 1.6035090684890747, "learning_rate": 6.088611563316346e-06, "loss": 0.8263, "step": 12662 }, { "epoch": 0.447280000353218, "grad_norm": 1.9610999822616577, "learning_rate": 6.088053268400403e-06, "loss": 0.8151, "step": 12663 }, { "epoch": 0.44731532215692593, "grad_norm": 1.832822322845459, "learning_rate": 6.087494959244223e-06, "loss": 0.8086, "step": 12664 }, { "epoch": 0.44735064396063384, "grad_norm": 1.5947712659835815, "learning_rate": 6.086936635855116e-06, "loss": 0.8085, "step": 12665 }, { "epoch": 0.44738596576434175, "grad_norm": 1.6639493703842163, "learning_rate": 6.086378298240385e-06, "loss": 0.8286, "step": 12666 }, { "epoch": 0.44742128756804966, "grad_norm": 1.6430777311325073, "learning_rate": 6.0858199464073385e-06, "loss": 0.8194, "step": 12667 }, { "epoch": 0.4474566093717576, "grad_norm": 1.6112253665924072, "learning_rate": 6.0852615803632865e-06, "loss": 0.7549, "step": 12668 }, { "epoch": 0.4474919311754655, "grad_norm": 1.628059983253479, "learning_rate": 6.084703200115533e-06, "loss": 0.7885, "step": 12669 }, { "epoch": 0.4475272529791734, "grad_norm": 1.9700336456298828, "learning_rate": 6.0841448056713885e-06, "loss": 0.7944, "step": 12670 }, { "epoch": 0.4475625747828813, "grad_norm": 1.7434401512145996, "learning_rate": 6.0835863970381616e-06, "loss": 0.8148, "step": 12671 }, { "epoch": 0.4475978965865892, "grad_norm": 1.7571065425872803, "learning_rate": 6.083027974223157e-06, "loss": 0.8006, "step": 12672 }, { "epoch": 0.4476332183902971, "grad_norm": 1.681849718093872, "learning_rate": 6.0824695372336885e-06, "loss": 0.809, "step": 12673 }, { "epoch": 0.447668540194005, "grad_norm": 2.2068073749542236, "learning_rate": 6.081911086077061e-06, "loss": 0.8527, "step": 12674 }, { "epoch": 0.4477038619977129, "grad_norm": 1.7631161212921143, "learning_rate": 6.081352620760585e-06, "loss": 0.8023, "step": 12675 }, { "epoch": 0.4477391838014208, "grad_norm": 2.0536680221557617, "learning_rate": 6.080794141291569e-06, "loss": 0.8306, "step": 12676 }, { "epoch": 0.4477745056051287, "grad_norm": 1.5712840557098389, "learning_rate": 6.0802356476773215e-06, "loss": 0.8045, "step": 12677 }, { "epoch": 0.4478098274088366, "grad_norm": 1.7283331155776978, "learning_rate": 6.079677139925155e-06, "loss": 0.8401, "step": 12678 }, { "epoch": 0.44784514921254454, "grad_norm": 1.698144793510437, "learning_rate": 6.079118618042376e-06, "loss": 0.7916, "step": 12679 }, { "epoch": 0.44788047101625245, "grad_norm": 1.9327383041381836, "learning_rate": 6.078560082036294e-06, "loss": 0.8046, "step": 12680 }, { "epoch": 0.44791579281996036, "grad_norm": 1.8969041109085083, "learning_rate": 6.078001531914223e-06, "loss": 0.7869, "step": 12681 }, { "epoch": 0.44795111462366827, "grad_norm": 1.9019120931625366, "learning_rate": 6.077442967683469e-06, "loss": 0.7923, "step": 12682 }, { "epoch": 0.4479864364273762, "grad_norm": 1.8436888456344604, "learning_rate": 6.076884389351345e-06, "loss": 0.7923, "step": 12683 }, { "epoch": 0.4480217582310841, "grad_norm": 1.5801856517791748, "learning_rate": 6.07632579692516e-06, "loss": 0.8034, "step": 12684 }, { "epoch": 0.448057080034792, "grad_norm": 1.8412251472473145, "learning_rate": 6.075767190412224e-06, "loss": 0.7803, "step": 12685 }, { "epoch": 0.4480924018384999, "grad_norm": 1.654099464416504, "learning_rate": 6.075208569819851e-06, "loss": 0.8491, "step": 12686 }, { "epoch": 0.44812772364220776, "grad_norm": 1.5986965894699097, "learning_rate": 6.07464993515535e-06, "loss": 0.7898, "step": 12687 }, { "epoch": 0.4481630454459157, "grad_norm": 1.6062146425247192, "learning_rate": 6.074091286426031e-06, "loss": 0.8087, "step": 12688 }, { "epoch": 0.4481983672496236, "grad_norm": 1.5630377531051636, "learning_rate": 6.073532623639209e-06, "loss": 0.8132, "step": 12689 }, { "epoch": 0.4482336890533315, "grad_norm": 1.5490756034851074, "learning_rate": 6.072973946802192e-06, "loss": 0.7879, "step": 12690 }, { "epoch": 0.4482690108570394, "grad_norm": 1.657716155052185, "learning_rate": 6.072415255922296e-06, "loss": 0.818, "step": 12691 }, { "epoch": 0.4483043326607473, "grad_norm": 1.6442375183105469, "learning_rate": 6.0718565510068295e-06, "loss": 0.8089, "step": 12692 }, { "epoch": 0.44833965446445523, "grad_norm": 1.815714955329895, "learning_rate": 6.071297832063107e-06, "loss": 0.7902, "step": 12693 }, { "epoch": 0.44837497626816314, "grad_norm": 1.5764890909194946, "learning_rate": 6.0707390990984394e-06, "loss": 0.8179, "step": 12694 }, { "epoch": 0.44841029807187105, "grad_norm": 1.9011955261230469, "learning_rate": 6.070180352120139e-06, "loss": 0.8194, "step": 12695 }, { "epoch": 0.44844561987557896, "grad_norm": 1.6330785751342773, "learning_rate": 6.06962159113552e-06, "loss": 0.7961, "step": 12696 }, { "epoch": 0.44848094167928687, "grad_norm": 1.619035005569458, "learning_rate": 6.069062816151894e-06, "loss": 0.8414, "step": 12697 }, { "epoch": 0.4485162634829948, "grad_norm": 1.60988187789917, "learning_rate": 6.068504027176574e-06, "loss": 0.7913, "step": 12698 }, { "epoch": 0.4485515852867027, "grad_norm": 1.6291829347610474, "learning_rate": 6.067945224216876e-06, "loss": 0.7988, "step": 12699 }, { "epoch": 0.44858690709041055, "grad_norm": 1.5525357723236084, "learning_rate": 6.067386407280111e-06, "loss": 0.8255, "step": 12700 }, { "epoch": 0.44862222889411846, "grad_norm": 1.6037379503250122, "learning_rate": 6.0668275763735925e-06, "loss": 0.7984, "step": 12701 }, { "epoch": 0.44865755069782637, "grad_norm": 1.949306845664978, "learning_rate": 6.066268731504638e-06, "loss": 0.8083, "step": 12702 }, { "epoch": 0.4486928725015343, "grad_norm": 1.8169807195663452, "learning_rate": 6.065709872680555e-06, "loss": 0.8115, "step": 12703 }, { "epoch": 0.4487281943052422, "grad_norm": 1.813567042350769, "learning_rate": 6.0651509999086645e-06, "loss": 0.7984, "step": 12704 }, { "epoch": 0.4487635161089501, "grad_norm": 2.0356297492980957, "learning_rate": 6.064592113196277e-06, "loss": 0.813, "step": 12705 }, { "epoch": 0.448798837912658, "grad_norm": 1.564200758934021, "learning_rate": 6.0640332125507074e-06, "loss": 0.8105, "step": 12706 }, { "epoch": 0.4488341597163659, "grad_norm": 1.6543090343475342, "learning_rate": 6.063474297979272e-06, "loss": 0.8275, "step": 12707 }, { "epoch": 0.44886948152007383, "grad_norm": 1.6518707275390625, "learning_rate": 6.062915369489284e-06, "loss": 0.7835, "step": 12708 }, { "epoch": 0.44890480332378174, "grad_norm": 1.6322269439697266, "learning_rate": 6.062356427088059e-06, "loss": 0.7868, "step": 12709 }, { "epoch": 0.44894012512748965, "grad_norm": 1.6203960180282593, "learning_rate": 6.061797470782914e-06, "loss": 0.8012, "step": 12710 }, { "epoch": 0.44897544693119756, "grad_norm": 1.5892095565795898, "learning_rate": 6.061238500581164e-06, "loss": 0.8124, "step": 12711 }, { "epoch": 0.4490107687349055, "grad_norm": 1.8505074977874756, "learning_rate": 6.060679516490122e-06, "loss": 0.8271, "step": 12712 }, { "epoch": 0.44904609053861333, "grad_norm": 1.5786744356155396, "learning_rate": 6.060120518517108e-06, "loss": 0.8324, "step": 12713 }, { "epoch": 0.44908141234232124, "grad_norm": 1.882359266281128, "learning_rate": 6.059561506669433e-06, "loss": 0.8215, "step": 12714 }, { "epoch": 0.44911673414602915, "grad_norm": 1.5825309753417969, "learning_rate": 6.059002480954419e-06, "loss": 0.8185, "step": 12715 }, { "epoch": 0.44915205594973706, "grad_norm": 1.7239913940429688, "learning_rate": 6.058443441379379e-06, "loss": 0.7989, "step": 12716 }, { "epoch": 0.44918737775344497, "grad_norm": 1.5558195114135742, "learning_rate": 6.057884387951629e-06, "loss": 0.8182, "step": 12717 }, { "epoch": 0.4492226995571529, "grad_norm": 1.594072937965393, "learning_rate": 6.057325320678489e-06, "loss": 0.7874, "step": 12718 }, { "epoch": 0.4492580213608608, "grad_norm": 1.8492166996002197, "learning_rate": 6.056766239567271e-06, "loss": 0.783, "step": 12719 }, { "epoch": 0.4492933431645687, "grad_norm": 1.6679654121398926, "learning_rate": 6.056207144625299e-06, "loss": 0.8433, "step": 12720 }, { "epoch": 0.4493286649682766, "grad_norm": 1.691506028175354, "learning_rate": 6.055648035859885e-06, "loss": 0.7841, "step": 12721 }, { "epoch": 0.4493639867719845, "grad_norm": 1.6413542032241821, "learning_rate": 6.055088913278345e-06, "loss": 0.8053, "step": 12722 }, { "epoch": 0.44939930857569244, "grad_norm": 1.5656323432922363, "learning_rate": 6.054529776888003e-06, "loss": 0.7836, "step": 12723 }, { "epoch": 0.44943463037940035, "grad_norm": 1.5550012588500977, "learning_rate": 6.05397062669617e-06, "loss": 0.8032, "step": 12724 }, { "epoch": 0.44946995218310826, "grad_norm": 1.8011431694030762, "learning_rate": 6.053411462710172e-06, "loss": 0.7845, "step": 12725 }, { "epoch": 0.4495052739868161, "grad_norm": 1.6253623962402344, "learning_rate": 6.052852284937321e-06, "loss": 0.785, "step": 12726 }, { "epoch": 0.449540595790524, "grad_norm": 1.788028359413147, "learning_rate": 6.052293093384936e-06, "loss": 0.8013, "step": 12727 }, { "epoch": 0.44957591759423193, "grad_norm": 1.6089857816696167, "learning_rate": 6.0517338880603385e-06, "loss": 0.824, "step": 12728 }, { "epoch": 0.44961123939793984, "grad_norm": 2.4574015140533447, "learning_rate": 6.0511746689708435e-06, "loss": 0.8258, "step": 12729 }, { "epoch": 0.44964656120164775, "grad_norm": 1.9664785861968994, "learning_rate": 6.050615436123774e-06, "loss": 0.8128, "step": 12730 }, { "epoch": 0.44968188300535566, "grad_norm": 1.8473280668258667, "learning_rate": 6.050056189526447e-06, "loss": 0.8181, "step": 12731 }, { "epoch": 0.4497172048090636, "grad_norm": 1.624617338180542, "learning_rate": 6.049496929186181e-06, "loss": 0.8453, "step": 12732 }, { "epoch": 0.4497525266127715, "grad_norm": 1.5944331884384155, "learning_rate": 6.048937655110298e-06, "loss": 0.7887, "step": 12733 }, { "epoch": 0.4497878484164794, "grad_norm": 1.7288376092910767, "learning_rate": 6.048378367306115e-06, "loss": 0.7972, "step": 12734 }, { "epoch": 0.4498231702201873, "grad_norm": 1.6996461153030396, "learning_rate": 6.0478190657809525e-06, "loss": 0.8144, "step": 12735 }, { "epoch": 0.4498584920238952, "grad_norm": 0.9848840832710266, "learning_rate": 6.047259750542131e-06, "loss": 0.5946, "step": 12736 }, { "epoch": 0.44989381382760313, "grad_norm": 1.664084553718567, "learning_rate": 6.0467004215969725e-06, "loss": 0.7652, "step": 12737 }, { "epoch": 0.44992913563131104, "grad_norm": 1.5765506029129028, "learning_rate": 6.046141078952793e-06, "loss": 0.7911, "step": 12738 }, { "epoch": 0.4499644574350189, "grad_norm": 1.7545942068099976, "learning_rate": 6.045581722616919e-06, "loss": 0.8129, "step": 12739 }, { "epoch": 0.4499997792387268, "grad_norm": 1.6466609239578247, "learning_rate": 6.045022352596666e-06, "loss": 0.8043, "step": 12740 }, { "epoch": 0.4500351010424347, "grad_norm": 1.7983639240264893, "learning_rate": 6.044462968899357e-06, "loss": 0.8179, "step": 12741 }, { "epoch": 0.4500704228461426, "grad_norm": 1.5174283981323242, "learning_rate": 6.043903571532314e-06, "loss": 0.7746, "step": 12742 }, { "epoch": 0.45010574464985054, "grad_norm": 1.5164302587509155, "learning_rate": 6.043344160502855e-06, "loss": 0.7702, "step": 12743 }, { "epoch": 0.45014106645355845, "grad_norm": 1.6599493026733398, "learning_rate": 6.042784735818306e-06, "loss": 0.8043, "step": 12744 }, { "epoch": 0.45017638825726636, "grad_norm": 1.7209997177124023, "learning_rate": 6.042225297485985e-06, "loss": 0.8069, "step": 12745 }, { "epoch": 0.45021171006097427, "grad_norm": 1.6461832523345947, "learning_rate": 6.0416658455132145e-06, "loss": 0.832, "step": 12746 }, { "epoch": 0.4502470318646822, "grad_norm": 1.6232984066009521, "learning_rate": 6.04110637990732e-06, "loss": 0.7744, "step": 12747 }, { "epoch": 0.4502823536683901, "grad_norm": 1.7156846523284912, "learning_rate": 6.040546900675619e-06, "loss": 0.8163, "step": 12748 }, { "epoch": 0.450317675472098, "grad_norm": 1.826207160949707, "learning_rate": 6.039987407825437e-06, "loss": 0.8092, "step": 12749 }, { "epoch": 0.4503529972758059, "grad_norm": 1.7910959720611572, "learning_rate": 6.039427901364094e-06, "loss": 0.8084, "step": 12750 }, { "epoch": 0.4503883190795138, "grad_norm": 1.6992491483688354, "learning_rate": 6.038868381298915e-06, "loss": 0.8199, "step": 12751 }, { "epoch": 0.4504236408832217, "grad_norm": 1.620418667793274, "learning_rate": 6.038308847637221e-06, "loss": 0.7808, "step": 12752 }, { "epoch": 0.4504589626869296, "grad_norm": 1.6309611797332764, "learning_rate": 6.0377493003863355e-06, "loss": 0.769, "step": 12753 }, { "epoch": 0.4504942844906375, "grad_norm": 1.540169358253479, "learning_rate": 6.037189739553584e-06, "loss": 0.8185, "step": 12754 }, { "epoch": 0.4505296062943454, "grad_norm": 1.6876442432403564, "learning_rate": 6.036630165146287e-06, "loss": 0.7979, "step": 12755 }, { "epoch": 0.4505649280980533, "grad_norm": 1.4928662776947021, "learning_rate": 6.036070577171769e-06, "loss": 0.808, "step": 12756 }, { "epoch": 0.45060024990176123, "grad_norm": 1.6637033224105835, "learning_rate": 6.035510975637356e-06, "loss": 0.8091, "step": 12757 }, { "epoch": 0.45063557170546914, "grad_norm": 1.6520960330963135, "learning_rate": 6.034951360550368e-06, "loss": 0.806, "step": 12758 }, { "epoch": 0.45067089350917705, "grad_norm": 1.5768946409225464, "learning_rate": 6.034391731918132e-06, "loss": 0.7804, "step": 12759 }, { "epoch": 0.45070621531288496, "grad_norm": 1.617049217224121, "learning_rate": 6.033832089747971e-06, "loss": 0.7885, "step": 12760 }, { "epoch": 0.45074153711659287, "grad_norm": 1.7071518898010254, "learning_rate": 6.0332724340472094e-06, "loss": 0.8221, "step": 12761 }, { "epoch": 0.4507768589203008, "grad_norm": 1.589464545249939, "learning_rate": 6.0327127648231746e-06, "loss": 0.8352, "step": 12762 }, { "epoch": 0.4508121807240087, "grad_norm": 1.8944863080978394, "learning_rate": 6.032153082083187e-06, "loss": 0.8114, "step": 12763 }, { "epoch": 0.4508475025277166, "grad_norm": 1.6918656826019287, "learning_rate": 6.031593385834574e-06, "loss": 0.8002, "step": 12764 }, { "epoch": 0.45088282433142446, "grad_norm": 1.722446084022522, "learning_rate": 6.031033676084662e-06, "loss": 0.8322, "step": 12765 }, { "epoch": 0.45091814613513237, "grad_norm": 1.5357060432434082, "learning_rate": 6.030473952840774e-06, "loss": 0.7995, "step": 12766 }, { "epoch": 0.4509534679388403, "grad_norm": 1.644045114517212, "learning_rate": 6.0299142161102375e-06, "loss": 0.7996, "step": 12767 }, { "epoch": 0.4509887897425482, "grad_norm": 1.6803250312805176, "learning_rate": 6.029354465900378e-06, "loss": 0.8542, "step": 12768 }, { "epoch": 0.4510241115462561, "grad_norm": 1.7927180528640747, "learning_rate": 6.028794702218519e-06, "loss": 0.8196, "step": 12769 }, { "epoch": 0.451059433349964, "grad_norm": 1.522316336631775, "learning_rate": 6.028234925071988e-06, "loss": 0.8107, "step": 12770 }, { "epoch": 0.4510947551536719, "grad_norm": 0.9504936337471008, "learning_rate": 6.027675134468114e-06, "loss": 0.5919, "step": 12771 }, { "epoch": 0.45113007695737983, "grad_norm": 1.8502206802368164, "learning_rate": 6.027115330414218e-06, "loss": 0.8064, "step": 12772 }, { "epoch": 0.45116539876108774, "grad_norm": 1.5673459768295288, "learning_rate": 6.026555512917632e-06, "loss": 0.8059, "step": 12773 }, { "epoch": 0.45120072056479565, "grad_norm": 1.7550734281539917, "learning_rate": 6.02599568198568e-06, "loss": 0.7906, "step": 12774 }, { "epoch": 0.45123604236850356, "grad_norm": 1.6107336282730103, "learning_rate": 6.025435837625688e-06, "loss": 0.7931, "step": 12775 }, { "epoch": 0.4512713641722115, "grad_norm": 1.726324200630188, "learning_rate": 6.024875979844987e-06, "loss": 0.769, "step": 12776 }, { "epoch": 0.4513066859759194, "grad_norm": 0.964536190032959, "learning_rate": 6.0243161086508996e-06, "loss": 0.5999, "step": 12777 }, { "epoch": 0.45134200777962724, "grad_norm": 1.770532250404358, "learning_rate": 6.023756224050756e-06, "loss": 0.7878, "step": 12778 }, { "epoch": 0.45137732958333515, "grad_norm": 1.5774040222167969, "learning_rate": 6.023196326051885e-06, "loss": 0.7922, "step": 12779 }, { "epoch": 0.45141265138704306, "grad_norm": 1.733074426651001, "learning_rate": 6.022636414661611e-06, "loss": 0.8088, "step": 12780 }, { "epoch": 0.451447973190751, "grad_norm": 1.6615018844604492, "learning_rate": 6.0220764898872645e-06, "loss": 0.7873, "step": 12781 }, { "epoch": 0.4514832949944589, "grad_norm": 1.6833126544952393, "learning_rate": 6.021516551736172e-06, "loss": 0.8171, "step": 12782 }, { "epoch": 0.4515186167981668, "grad_norm": 1.7546753883361816, "learning_rate": 6.020956600215664e-06, "loss": 0.853, "step": 12783 }, { "epoch": 0.4515539386018747, "grad_norm": 1.7683093547821045, "learning_rate": 6.0203966353330675e-06, "loss": 0.799, "step": 12784 }, { "epoch": 0.4515892604055826, "grad_norm": 1.5217111110687256, "learning_rate": 6.019836657095712e-06, "loss": 0.8262, "step": 12785 }, { "epoch": 0.4516245822092905, "grad_norm": 1.5804026126861572, "learning_rate": 6.0192766655109265e-06, "loss": 0.7678, "step": 12786 }, { "epoch": 0.45165990401299844, "grad_norm": 1.8671401739120483, "learning_rate": 6.018716660586038e-06, "loss": 0.7939, "step": 12787 }, { "epoch": 0.45169522581670635, "grad_norm": 1.7958306074142456, "learning_rate": 6.018156642328379e-06, "loss": 0.8264, "step": 12788 }, { "epoch": 0.45173054762041426, "grad_norm": 1.6484875679016113, "learning_rate": 6.017596610745278e-06, "loss": 0.847, "step": 12789 }, { "epoch": 0.45176586942412217, "grad_norm": 1.707556962966919, "learning_rate": 6.017036565844061e-06, "loss": 0.8105, "step": 12790 }, { "epoch": 0.45180119122783, "grad_norm": 1.524304986000061, "learning_rate": 6.016476507632062e-06, "loss": 0.8095, "step": 12791 }, { "epoch": 0.45183651303153793, "grad_norm": 1.559319257736206, "learning_rate": 6.01591643611661e-06, "loss": 0.7937, "step": 12792 }, { "epoch": 0.45187183483524584, "grad_norm": 1.773546814918518, "learning_rate": 6.015356351305034e-06, "loss": 0.8631, "step": 12793 }, { "epoch": 0.45190715663895376, "grad_norm": 1.8031355142593384, "learning_rate": 6.014796253204666e-06, "loss": 0.8096, "step": 12794 }, { "epoch": 0.45194247844266167, "grad_norm": 1.5580246448516846, "learning_rate": 6.014236141822834e-06, "loss": 0.7794, "step": 12795 }, { "epoch": 0.4519778002463696, "grad_norm": 1.6276185512542725, "learning_rate": 6.013676017166871e-06, "loss": 0.814, "step": 12796 }, { "epoch": 0.4520131220500775, "grad_norm": 1.6140047311782837, "learning_rate": 6.013115879244107e-06, "loss": 0.7795, "step": 12797 }, { "epoch": 0.4520484438537854, "grad_norm": 1.8396499156951904, "learning_rate": 6.012555728061871e-06, "loss": 0.7993, "step": 12798 }, { "epoch": 0.4520837656574933, "grad_norm": 1.698997139930725, "learning_rate": 6.0119955636274976e-06, "loss": 0.7975, "step": 12799 }, { "epoch": 0.4521190874612012, "grad_norm": 1.8183857202529907, "learning_rate": 6.011435385948317e-06, "loss": 0.8041, "step": 12800 }, { "epoch": 0.45215440926490913, "grad_norm": 1.6904734373092651, "learning_rate": 6.010875195031658e-06, "loss": 0.8165, "step": 12801 }, { "epoch": 0.45218973106861704, "grad_norm": 1.9399244785308838, "learning_rate": 6.010314990884856e-06, "loss": 0.825, "step": 12802 }, { "epoch": 0.45222505287232495, "grad_norm": 1.6365410089492798, "learning_rate": 6.0097547735152414e-06, "loss": 0.785, "step": 12803 }, { "epoch": 0.4522603746760328, "grad_norm": 1.7420763969421387, "learning_rate": 6.009194542930147e-06, "loss": 0.8268, "step": 12804 }, { "epoch": 0.4522956964797407, "grad_norm": 1.7732378244400024, "learning_rate": 6.008634299136903e-06, "loss": 0.8283, "step": 12805 }, { "epoch": 0.4523310182834486, "grad_norm": 1.8609846830368042, "learning_rate": 6.008074042142842e-06, "loss": 0.7949, "step": 12806 }, { "epoch": 0.45236634008715654, "grad_norm": 1.634719967842102, "learning_rate": 6.007513771955299e-06, "loss": 0.8064, "step": 12807 }, { "epoch": 0.45240166189086445, "grad_norm": 1.666100263595581, "learning_rate": 6.006953488581604e-06, "loss": 0.794, "step": 12808 }, { "epoch": 0.45243698369457236, "grad_norm": 1.7640552520751953, "learning_rate": 6.006393192029091e-06, "loss": 0.7707, "step": 12809 }, { "epoch": 0.45247230549828027, "grad_norm": 1.572457194328308, "learning_rate": 6.0058328823050934e-06, "loss": 0.8177, "step": 12810 }, { "epoch": 0.4525076273019882, "grad_norm": 1.6375632286071777, "learning_rate": 6.0052725594169435e-06, "loss": 0.7932, "step": 12811 }, { "epoch": 0.4525429491056961, "grad_norm": 1.6103911399841309, "learning_rate": 6.0047122233719765e-06, "loss": 0.7872, "step": 12812 }, { "epoch": 0.452578270909404, "grad_norm": 1.6221777200698853, "learning_rate": 6.004151874177526e-06, "loss": 0.7875, "step": 12813 }, { "epoch": 0.4526135927131119, "grad_norm": 2.3151211738586426, "learning_rate": 6.003591511840923e-06, "loss": 0.8147, "step": 12814 }, { "epoch": 0.4526489145168198, "grad_norm": 1.6772352457046509, "learning_rate": 6.003031136369502e-06, "loss": 0.7576, "step": 12815 }, { "epoch": 0.45268423632052773, "grad_norm": 1.8260730504989624, "learning_rate": 6.002470747770599e-06, "loss": 0.8019, "step": 12816 }, { "epoch": 0.4527195581242356, "grad_norm": 1.5889604091644287, "learning_rate": 6.001910346051547e-06, "loss": 0.802, "step": 12817 }, { "epoch": 0.4527548799279435, "grad_norm": 1.8468269109725952, "learning_rate": 6.001349931219682e-06, "loss": 0.8063, "step": 12818 }, { "epoch": 0.4527902017316514, "grad_norm": 1.9798301458358765, "learning_rate": 6.000789503282335e-06, "loss": 0.7831, "step": 12819 }, { "epoch": 0.4528255235353593, "grad_norm": 1.6371815204620361, "learning_rate": 6.000229062246845e-06, "loss": 0.8425, "step": 12820 }, { "epoch": 0.45286084533906723, "grad_norm": 1.7457374334335327, "learning_rate": 5.999668608120545e-06, "loss": 0.833, "step": 12821 }, { "epoch": 0.45289616714277514, "grad_norm": 1.781798243522644, "learning_rate": 5.99910814091077e-06, "loss": 0.8197, "step": 12822 }, { "epoch": 0.45293148894648305, "grad_norm": 1.667760968208313, "learning_rate": 5.998547660624855e-06, "loss": 0.7895, "step": 12823 }, { "epoch": 0.45296681075019096, "grad_norm": 1.636670470237732, "learning_rate": 5.9979871672701355e-06, "loss": 0.8069, "step": 12824 }, { "epoch": 0.4530021325538989, "grad_norm": 1.6476885080337524, "learning_rate": 5.997426660853947e-06, "loss": 0.7719, "step": 12825 }, { "epoch": 0.4530374543576068, "grad_norm": 1.5508267879486084, "learning_rate": 5.996866141383628e-06, "loss": 0.7811, "step": 12826 }, { "epoch": 0.4530727761613147, "grad_norm": 1.5069996118545532, "learning_rate": 5.996305608866511e-06, "loss": 0.8015, "step": 12827 }, { "epoch": 0.4531080979650226, "grad_norm": 1.6482152938842773, "learning_rate": 5.995745063309933e-06, "loss": 0.7955, "step": 12828 }, { "epoch": 0.4531434197687305, "grad_norm": 1.7075905799865723, "learning_rate": 5.995184504721232e-06, "loss": 0.8209, "step": 12829 }, { "epoch": 0.45317874157243837, "grad_norm": 1.80428946018219, "learning_rate": 5.994623933107743e-06, "loss": 0.7929, "step": 12830 }, { "epoch": 0.4532140633761463, "grad_norm": 1.931929111480713, "learning_rate": 5.994063348476804e-06, "loss": 0.798, "step": 12831 }, { "epoch": 0.4532493851798542, "grad_norm": 1.458911418914795, "learning_rate": 5.993502750835748e-06, "loss": 0.7915, "step": 12832 }, { "epoch": 0.4532847069835621, "grad_norm": 1.4654194116592407, "learning_rate": 5.992942140191916e-06, "loss": 0.7922, "step": 12833 }, { "epoch": 0.45332002878727, "grad_norm": 1.596631407737732, "learning_rate": 5.992381516552646e-06, "loss": 0.7836, "step": 12834 }, { "epoch": 0.4533553505909779, "grad_norm": 1.7811230421066284, "learning_rate": 5.99182087992527e-06, "loss": 0.7998, "step": 12835 }, { "epoch": 0.45339067239468583, "grad_norm": 1.7704216241836548, "learning_rate": 5.9912602303171305e-06, "loss": 0.8007, "step": 12836 }, { "epoch": 0.45342599419839374, "grad_norm": 1.734554409980774, "learning_rate": 5.9906995677355625e-06, "loss": 0.8201, "step": 12837 }, { "epoch": 0.45346131600210166, "grad_norm": 1.8406670093536377, "learning_rate": 5.9901388921879055e-06, "loss": 0.7598, "step": 12838 }, { "epoch": 0.45349663780580957, "grad_norm": 1.6009960174560547, "learning_rate": 5.9895782036814965e-06, "loss": 0.808, "step": 12839 }, { "epoch": 0.4535319596095175, "grad_norm": 1.5542113780975342, "learning_rate": 5.989017502223675e-06, "loss": 0.7631, "step": 12840 }, { "epoch": 0.4535672814132254, "grad_norm": 1.5468995571136475, "learning_rate": 5.988456787821778e-06, "loss": 0.8057, "step": 12841 }, { "epoch": 0.4536026032169333, "grad_norm": 1.7966846227645874, "learning_rate": 5.987896060483144e-06, "loss": 0.809, "step": 12842 }, { "epoch": 0.45363792502064115, "grad_norm": 1.6749097108840942, "learning_rate": 5.9873353202151104e-06, "loss": 0.7961, "step": 12843 }, { "epoch": 0.45367324682434906, "grad_norm": 1.7037913799285889, "learning_rate": 5.98677456702502e-06, "loss": 0.7829, "step": 12844 }, { "epoch": 0.453708568628057, "grad_norm": 1.7466236352920532, "learning_rate": 5.986213800920209e-06, "loss": 0.7872, "step": 12845 }, { "epoch": 0.4537438904317649, "grad_norm": 1.8005831241607666, "learning_rate": 5.985653021908017e-06, "loss": 0.8145, "step": 12846 }, { "epoch": 0.4537792122354728, "grad_norm": 3.486943244934082, "learning_rate": 5.985092229995784e-06, "loss": 0.835, "step": 12847 }, { "epoch": 0.4538145340391807, "grad_norm": 2.353519916534424, "learning_rate": 5.984531425190848e-06, "loss": 0.8136, "step": 12848 }, { "epoch": 0.4538498558428886, "grad_norm": 1.617908000946045, "learning_rate": 5.983970607500551e-06, "loss": 0.7784, "step": 12849 }, { "epoch": 0.4538851776465965, "grad_norm": 0.9877070188522339, "learning_rate": 5.983409776932231e-06, "loss": 0.586, "step": 12850 }, { "epoch": 0.45392049945030444, "grad_norm": 1.9408270120620728, "learning_rate": 5.982848933493229e-06, "loss": 0.802, "step": 12851 }, { "epoch": 0.45395582125401235, "grad_norm": 1.5815647840499878, "learning_rate": 5.982288077190885e-06, "loss": 0.7846, "step": 12852 }, { "epoch": 0.45399114305772026, "grad_norm": 0.8958619236946106, "learning_rate": 5.981727208032538e-06, "loss": 0.6036, "step": 12853 }, { "epoch": 0.45402646486142817, "grad_norm": 1.916659951210022, "learning_rate": 5.981166326025532e-06, "loss": 0.7922, "step": 12854 }, { "epoch": 0.4540617866651361, "grad_norm": 1.6022239923477173, "learning_rate": 5.980605431177204e-06, "loss": 0.7998, "step": 12855 }, { "epoch": 0.45409710846884394, "grad_norm": 1.695959448814392, "learning_rate": 5.980044523494895e-06, "loss": 0.8136, "step": 12856 }, { "epoch": 0.45413243027255185, "grad_norm": 0.788406491279602, "learning_rate": 5.97948360298595e-06, "loss": 0.5563, "step": 12857 }, { "epoch": 0.45416775207625976, "grad_norm": 1.633571743965149, "learning_rate": 5.978922669657709e-06, "loss": 0.7883, "step": 12858 }, { "epoch": 0.45420307387996767, "grad_norm": 1.5613332986831665, "learning_rate": 5.9783617235175096e-06, "loss": 0.7621, "step": 12859 }, { "epoch": 0.4542383956836756, "grad_norm": 1.7211896181106567, "learning_rate": 5.977800764572696e-06, "loss": 0.8206, "step": 12860 }, { "epoch": 0.4542737174873835, "grad_norm": 1.8714321851730347, "learning_rate": 5.97723979283061e-06, "loss": 0.8022, "step": 12861 }, { "epoch": 0.4543090392910914, "grad_norm": 1.7686740159988403, "learning_rate": 5.976678808298594e-06, "loss": 0.8264, "step": 12862 }, { "epoch": 0.4543443610947993, "grad_norm": 1.7137736082077026, "learning_rate": 5.976117810983989e-06, "loss": 0.7894, "step": 12863 }, { "epoch": 0.4543796828985072, "grad_norm": 1.5256736278533936, "learning_rate": 5.9755568008941365e-06, "loss": 0.8049, "step": 12864 }, { "epoch": 0.45441500470221513, "grad_norm": 1.7986960411071777, "learning_rate": 5.974995778036382e-06, "loss": 0.8103, "step": 12865 }, { "epoch": 0.45445032650592304, "grad_norm": 1.606521725654602, "learning_rate": 5.974434742418065e-06, "loss": 0.7839, "step": 12866 }, { "epoch": 0.45448564830963095, "grad_norm": 1.5697743892669678, "learning_rate": 5.973873694046528e-06, "loss": 0.7831, "step": 12867 }, { "epoch": 0.45452097011333886, "grad_norm": 1.8906546831130981, "learning_rate": 5.973312632929118e-06, "loss": 0.75, "step": 12868 }, { "epoch": 0.4545562919170467, "grad_norm": 1.8649669885635376, "learning_rate": 5.9727515590731734e-06, "loss": 0.8406, "step": 12869 }, { "epoch": 0.45459161372075463, "grad_norm": 1.5956897735595703, "learning_rate": 5.9721904724860406e-06, "loss": 0.8056, "step": 12870 }, { "epoch": 0.45462693552446254, "grad_norm": 1.6465907096862793, "learning_rate": 5.97162937317506e-06, "loss": 0.8149, "step": 12871 }, { "epoch": 0.45466225732817045, "grad_norm": 1.5479316711425781, "learning_rate": 5.971068261147577e-06, "loss": 0.7745, "step": 12872 }, { "epoch": 0.45469757913187836, "grad_norm": 1.6204688549041748, "learning_rate": 5.970507136410936e-06, "loss": 0.8034, "step": 12873 }, { "epoch": 0.45473290093558627, "grad_norm": 1.8674758672714233, "learning_rate": 5.969945998972478e-06, "loss": 0.8057, "step": 12874 }, { "epoch": 0.4547682227392942, "grad_norm": 1.9326460361480713, "learning_rate": 5.969384848839551e-06, "loss": 0.8094, "step": 12875 }, { "epoch": 0.4548035445430021, "grad_norm": 1.69593346118927, "learning_rate": 5.968823686019497e-06, "loss": 0.8429, "step": 12876 }, { "epoch": 0.45483886634671, "grad_norm": 1.982704520225525, "learning_rate": 5.968262510519661e-06, "loss": 0.8052, "step": 12877 }, { "epoch": 0.4548741881504179, "grad_norm": 1.8092436790466309, "learning_rate": 5.967701322347388e-06, "loss": 0.8032, "step": 12878 }, { "epoch": 0.4549095099541258, "grad_norm": 3.6783761978149414, "learning_rate": 5.96714012151002e-06, "loss": 0.7696, "step": 12879 }, { "epoch": 0.45494483175783373, "grad_norm": 2.002931833267212, "learning_rate": 5.966578908014905e-06, "loss": 0.815, "step": 12880 }, { "epoch": 0.45498015356154164, "grad_norm": 1.6280794143676758, "learning_rate": 5.966017681869387e-06, "loss": 0.7883, "step": 12881 }, { "epoch": 0.4550154753652495, "grad_norm": 1.9819719791412354, "learning_rate": 5.965456443080811e-06, "loss": 0.7943, "step": 12882 }, { "epoch": 0.4550507971689574, "grad_norm": 1.82471764087677, "learning_rate": 5.964895191656522e-06, "loss": 0.8528, "step": 12883 }, { "epoch": 0.4550861189726653, "grad_norm": 1.711790919303894, "learning_rate": 5.964333927603867e-06, "loss": 0.8253, "step": 12884 }, { "epoch": 0.45512144077637323, "grad_norm": 1.947547435760498, "learning_rate": 5.96377265093019e-06, "loss": 0.8098, "step": 12885 }, { "epoch": 0.45515676258008114, "grad_norm": 1.5843065977096558, "learning_rate": 5.963211361642838e-06, "loss": 0.7682, "step": 12886 }, { "epoch": 0.45519208438378905, "grad_norm": 1.6460845470428467, "learning_rate": 5.962650059749157e-06, "loss": 0.7879, "step": 12887 }, { "epoch": 0.45522740618749696, "grad_norm": 1.5661907196044922, "learning_rate": 5.962088745256494e-06, "loss": 0.7727, "step": 12888 }, { "epoch": 0.4552627279912049, "grad_norm": 1.6581865549087524, "learning_rate": 5.961527418172193e-06, "loss": 0.8373, "step": 12889 }, { "epoch": 0.4552980497949128, "grad_norm": 1.7059698104858398, "learning_rate": 5.960966078503602e-06, "loss": 0.777, "step": 12890 }, { "epoch": 0.4553333715986207, "grad_norm": 1.561049461364746, "learning_rate": 5.960404726258067e-06, "loss": 0.7663, "step": 12891 }, { "epoch": 0.4553686934023286, "grad_norm": 1.8101577758789062, "learning_rate": 5.959843361442937e-06, "loss": 0.7883, "step": 12892 }, { "epoch": 0.4554040152060365, "grad_norm": 1.8237215280532837, "learning_rate": 5.959281984065556e-06, "loss": 0.797, "step": 12893 }, { "epoch": 0.4554393370097444, "grad_norm": 1.6376214027404785, "learning_rate": 5.958720594133274e-06, "loss": 0.8172, "step": 12894 }, { "epoch": 0.4554746588134523, "grad_norm": 1.0519566535949707, "learning_rate": 5.958159191653437e-06, "loss": 0.5933, "step": 12895 }, { "epoch": 0.4555099806171602, "grad_norm": 1.6789453029632568, "learning_rate": 5.9575977766333925e-06, "loss": 0.8284, "step": 12896 }, { "epoch": 0.4555453024208681, "grad_norm": 1.863016128540039, "learning_rate": 5.957036349080488e-06, "loss": 0.7741, "step": 12897 }, { "epoch": 0.455580624224576, "grad_norm": 1.5468335151672363, "learning_rate": 5.956474909002071e-06, "loss": 0.8267, "step": 12898 }, { "epoch": 0.4556159460282839, "grad_norm": 1.8462294340133667, "learning_rate": 5.955913456405491e-06, "loss": 0.7806, "step": 12899 }, { "epoch": 0.45565126783199184, "grad_norm": 1.7125582695007324, "learning_rate": 5.955351991298096e-06, "loss": 0.7894, "step": 12900 }, { "epoch": 0.45568658963569975, "grad_norm": 1.7988868951797485, "learning_rate": 5.954790513687232e-06, "loss": 0.8053, "step": 12901 }, { "epoch": 0.45572191143940766, "grad_norm": 1.6694600582122803, "learning_rate": 5.95422902358025e-06, "loss": 0.7947, "step": 12902 }, { "epoch": 0.45575723324311557, "grad_norm": 1.9857481718063354, "learning_rate": 5.953667520984497e-06, "loss": 0.8299, "step": 12903 }, { "epoch": 0.4557925550468235, "grad_norm": 1.5287048816680908, "learning_rate": 5.953106005907325e-06, "loss": 0.789, "step": 12904 }, { "epoch": 0.4558278768505314, "grad_norm": 1.742150902748108, "learning_rate": 5.952544478356082e-06, "loss": 0.8029, "step": 12905 }, { "epoch": 0.4558631986542393, "grad_norm": 1.7229673862457275, "learning_rate": 5.951982938338112e-06, "loss": 0.8519, "step": 12906 }, { "epoch": 0.4558985204579472, "grad_norm": 1.6983059644699097, "learning_rate": 5.9514213858607696e-06, "loss": 0.8001, "step": 12907 }, { "epoch": 0.45593384226165506, "grad_norm": 1.756809115409851, "learning_rate": 5.950859820931403e-06, "loss": 0.7768, "step": 12908 }, { "epoch": 0.455969164065363, "grad_norm": 1.6141566038131714, "learning_rate": 5.950298243557362e-06, "loss": 0.8182, "step": 12909 }, { "epoch": 0.4560044858690709, "grad_norm": 1.614365577697754, "learning_rate": 5.949736653745997e-06, "loss": 0.8056, "step": 12910 }, { "epoch": 0.4560398076727788, "grad_norm": 1.6431081295013428, "learning_rate": 5.949175051504655e-06, "loss": 0.7857, "step": 12911 }, { "epoch": 0.4560751294764867, "grad_norm": 1.757624626159668, "learning_rate": 5.94861343684069e-06, "loss": 0.8012, "step": 12912 }, { "epoch": 0.4561104512801946, "grad_norm": 1.5063681602478027, "learning_rate": 5.94805180976145e-06, "loss": 0.7685, "step": 12913 }, { "epoch": 0.45614577308390253, "grad_norm": 1.5436280965805054, "learning_rate": 5.947490170274287e-06, "loss": 0.7926, "step": 12914 }, { "epoch": 0.45618109488761044, "grad_norm": 1.6540679931640625, "learning_rate": 5.946928518386551e-06, "loss": 0.8364, "step": 12915 }, { "epoch": 0.45621641669131835, "grad_norm": 1.6651583909988403, "learning_rate": 5.946366854105591e-06, "loss": 0.7693, "step": 12916 }, { "epoch": 0.45625173849502626, "grad_norm": 2.058034896850586, "learning_rate": 5.94580517743876e-06, "loss": 0.8408, "step": 12917 }, { "epoch": 0.45628706029873417, "grad_norm": 1.6710742712020874, "learning_rate": 5.94524348839341e-06, "loss": 0.7844, "step": 12918 }, { "epoch": 0.4563223821024421, "grad_norm": 1.6092326641082764, "learning_rate": 5.944681786976889e-06, "loss": 0.7786, "step": 12919 }, { "epoch": 0.45635770390615, "grad_norm": 1.625832438468933, "learning_rate": 5.944120073196552e-06, "loss": 0.8074, "step": 12920 }, { "epoch": 0.45639302570985785, "grad_norm": 2.6821255683898926, "learning_rate": 5.943558347059748e-06, "loss": 0.8009, "step": 12921 }, { "epoch": 0.45642834751356576, "grad_norm": 1.6901397705078125, "learning_rate": 5.9429966085738285e-06, "loss": 0.758, "step": 12922 }, { "epoch": 0.45646366931727367, "grad_norm": 1.8767591714859009, "learning_rate": 5.9424348577461485e-06, "loss": 0.7758, "step": 12923 }, { "epoch": 0.4564989911209816, "grad_norm": 2.0763802528381348, "learning_rate": 5.941873094584059e-06, "loss": 0.7833, "step": 12924 }, { "epoch": 0.4565343129246895, "grad_norm": 1.7488949298858643, "learning_rate": 5.94131131909491e-06, "loss": 0.8324, "step": 12925 }, { "epoch": 0.4565696347283974, "grad_norm": 0.9517992734909058, "learning_rate": 5.9407495312860565e-06, "loss": 0.5964, "step": 12926 }, { "epoch": 0.4566049565321053, "grad_norm": 1.86940598487854, "learning_rate": 5.940187731164848e-06, "loss": 0.8249, "step": 12927 }, { "epoch": 0.4566402783358132, "grad_norm": 1.9159724712371826, "learning_rate": 5.939625918738641e-06, "loss": 0.8143, "step": 12928 }, { "epoch": 0.45667560013952113, "grad_norm": 2.061603307723999, "learning_rate": 5.939064094014786e-06, "loss": 0.828, "step": 12929 }, { "epoch": 0.45671092194322904, "grad_norm": 2.0103445053100586, "learning_rate": 5.938502257000635e-06, "loss": 0.8036, "step": 12930 }, { "epoch": 0.45674624374693695, "grad_norm": 2.011942148208618, "learning_rate": 5.9379404077035465e-06, "loss": 0.8294, "step": 12931 }, { "epoch": 0.45678156555064486, "grad_norm": 1.955161690711975, "learning_rate": 5.937378546130868e-06, "loss": 0.8056, "step": 12932 }, { "epoch": 0.4568168873543528, "grad_norm": 2.075131893157959, "learning_rate": 5.9368166722899554e-06, "loss": 0.8181, "step": 12933 }, { "epoch": 0.45685220915806063, "grad_norm": 1.8313101530075073, "learning_rate": 5.936254786188164e-06, "loss": 0.8011, "step": 12934 }, { "epoch": 0.45688753096176854, "grad_norm": 1.8746298551559448, "learning_rate": 5.935692887832843e-06, "loss": 0.775, "step": 12935 }, { "epoch": 0.45692285276547645, "grad_norm": 1.8576858043670654, "learning_rate": 5.935130977231351e-06, "loss": 0.7963, "step": 12936 }, { "epoch": 0.45695817456918436, "grad_norm": 2.08931827545166, "learning_rate": 5.934569054391041e-06, "loss": 0.8076, "step": 12937 }, { "epoch": 0.45699349637289227, "grad_norm": 2.1502583026885986, "learning_rate": 5.934007119319265e-06, "loss": 0.8064, "step": 12938 }, { "epoch": 0.4570288181766002, "grad_norm": 1.9703487157821655, "learning_rate": 5.933445172023381e-06, "loss": 0.7957, "step": 12939 }, { "epoch": 0.4570641399803081, "grad_norm": 2.0561861991882324, "learning_rate": 5.9328832125107415e-06, "loss": 0.8338, "step": 12940 }, { "epoch": 0.457099461784016, "grad_norm": 2.592690944671631, "learning_rate": 5.932321240788702e-06, "loss": 0.7767, "step": 12941 }, { "epoch": 0.4571347835877239, "grad_norm": 2.9240458011627197, "learning_rate": 5.931759256864618e-06, "loss": 0.8131, "step": 12942 }, { "epoch": 0.4571701053914318, "grad_norm": 1.8689119815826416, "learning_rate": 5.931197260745844e-06, "loss": 0.8105, "step": 12943 }, { "epoch": 0.45720542719513974, "grad_norm": 2.2398579120635986, "learning_rate": 5.930635252439735e-06, "loss": 0.781, "step": 12944 }, { "epoch": 0.45724074899884765, "grad_norm": 1.9850854873657227, "learning_rate": 5.930073231953646e-06, "loss": 0.8425, "step": 12945 }, { "epoch": 0.45727607080255556, "grad_norm": 0.9982527494430542, "learning_rate": 5.929511199294934e-06, "loss": 0.6219, "step": 12946 }, { "epoch": 0.4573113926062634, "grad_norm": 1.9684288501739502, "learning_rate": 5.928949154470954e-06, "loss": 0.7894, "step": 12947 }, { "epoch": 0.4573467144099713, "grad_norm": 2.027130365371704, "learning_rate": 5.9283870974890615e-06, "loss": 0.7859, "step": 12948 }, { "epoch": 0.45738203621367923, "grad_norm": 2.174091100692749, "learning_rate": 5.927825028356614e-06, "loss": 0.8004, "step": 12949 }, { "epoch": 0.45741735801738714, "grad_norm": 2.0492193698883057, "learning_rate": 5.927262947080967e-06, "loss": 0.7884, "step": 12950 }, { "epoch": 0.45745267982109505, "grad_norm": 3.0416271686553955, "learning_rate": 5.926700853669477e-06, "loss": 0.8114, "step": 12951 }, { "epoch": 0.45748800162480296, "grad_norm": 1.7838802337646484, "learning_rate": 5.926138748129501e-06, "loss": 0.8256, "step": 12952 }, { "epoch": 0.4575233234285109, "grad_norm": 2.119948148727417, "learning_rate": 5.925576630468393e-06, "loss": 0.761, "step": 12953 }, { "epoch": 0.4575586452322188, "grad_norm": 1.7192367315292358, "learning_rate": 5.925014500693513e-06, "loss": 0.8122, "step": 12954 }, { "epoch": 0.4575939670359267, "grad_norm": 1.7302685976028442, "learning_rate": 5.924452358812218e-06, "loss": 0.7925, "step": 12955 }, { "epoch": 0.4576292888396346, "grad_norm": 1.7497183084487915, "learning_rate": 5.923890204831862e-06, "loss": 0.8223, "step": 12956 }, { "epoch": 0.4576646106433425, "grad_norm": 1.964885950088501, "learning_rate": 5.923328038759808e-06, "loss": 0.7953, "step": 12957 }, { "epoch": 0.45769993244705043, "grad_norm": 1.6598626375198364, "learning_rate": 5.922765860603409e-06, "loss": 0.8154, "step": 12958 }, { "epoch": 0.45773525425075834, "grad_norm": 1.7458117008209229, "learning_rate": 5.922203670370021e-06, "loss": 0.793, "step": 12959 }, { "epoch": 0.4577705760544662, "grad_norm": 2.774442672729492, "learning_rate": 5.92164146806701e-06, "loss": 0.8209, "step": 12960 }, { "epoch": 0.4578058978581741, "grad_norm": 1.7171728610992432, "learning_rate": 5.921079253701724e-06, "loss": 0.8021, "step": 12961 }, { "epoch": 0.457841219661882, "grad_norm": 1.8069109916687012, "learning_rate": 5.920517027281528e-06, "loss": 0.8154, "step": 12962 }, { "epoch": 0.4578765414655899, "grad_norm": 1.6938226222991943, "learning_rate": 5.919954788813778e-06, "loss": 0.7815, "step": 12963 }, { "epoch": 0.45791186326929784, "grad_norm": 1.6031419038772583, "learning_rate": 5.9193925383058325e-06, "loss": 0.7754, "step": 12964 }, { "epoch": 0.45794718507300575, "grad_norm": 1.8065338134765625, "learning_rate": 5.918830275765051e-06, "loss": 0.7933, "step": 12965 }, { "epoch": 0.45798250687671366, "grad_norm": 1.5432860851287842, "learning_rate": 5.91826800119879e-06, "loss": 0.7769, "step": 12966 }, { "epoch": 0.45801782868042157, "grad_norm": 1.64234459400177, "learning_rate": 5.917705714614411e-06, "loss": 0.8061, "step": 12967 }, { "epoch": 0.4580531504841295, "grad_norm": 1.7683392763137817, "learning_rate": 5.917143416019272e-06, "loss": 0.7786, "step": 12968 }, { "epoch": 0.4580884722878374, "grad_norm": 1.76399827003479, "learning_rate": 5.916581105420732e-06, "loss": 0.7649, "step": 12969 }, { "epoch": 0.4581237940915453, "grad_norm": 1.6496777534484863, "learning_rate": 5.916018782826153e-06, "loss": 0.8239, "step": 12970 }, { "epoch": 0.4581591158952532, "grad_norm": 1.6361536979675293, "learning_rate": 5.91545644824289e-06, "loss": 0.7745, "step": 12971 }, { "epoch": 0.4581944376989611, "grad_norm": 2.124178409576416, "learning_rate": 5.914894101678306e-06, "loss": 0.7652, "step": 12972 }, { "epoch": 0.458229759502669, "grad_norm": 1.7975366115570068, "learning_rate": 5.9143317431397595e-06, "loss": 0.8058, "step": 12973 }, { "epoch": 0.4582650813063769, "grad_norm": 1.8613275289535522, "learning_rate": 5.91376937263461e-06, "loss": 0.8288, "step": 12974 }, { "epoch": 0.4583004031100848, "grad_norm": 1.7851496934890747, "learning_rate": 5.91320699017022e-06, "loss": 0.8153, "step": 12975 }, { "epoch": 0.4583357249137927, "grad_norm": 1.7504448890686035, "learning_rate": 5.912644595753949e-06, "loss": 0.7971, "step": 12976 }, { "epoch": 0.4583710467175006, "grad_norm": 1.7307186126708984, "learning_rate": 5.912082189393156e-06, "loss": 0.8187, "step": 12977 }, { "epoch": 0.45840636852120853, "grad_norm": 2.715282440185547, "learning_rate": 5.911519771095203e-06, "loss": 0.7864, "step": 12978 }, { "epoch": 0.45844169032491644, "grad_norm": 1.723107099533081, "learning_rate": 5.910957340867451e-06, "loss": 0.8013, "step": 12979 }, { "epoch": 0.45847701212862435, "grad_norm": 1.6671943664550781, "learning_rate": 5.910394898717261e-06, "loss": 0.8095, "step": 12980 }, { "epoch": 0.45851233393233226, "grad_norm": 1.7403777837753296, "learning_rate": 5.9098324446519936e-06, "loss": 0.8006, "step": 12981 }, { "epoch": 0.45854765573604017, "grad_norm": 2.0163204669952393, "learning_rate": 5.909269978679008e-06, "loss": 0.8031, "step": 12982 }, { "epoch": 0.4585829775397481, "grad_norm": 1.7179453372955322, "learning_rate": 5.90870750080567e-06, "loss": 0.7933, "step": 12983 }, { "epoch": 0.458618299343456, "grad_norm": 1.6553215980529785, "learning_rate": 5.90814501103934e-06, "loss": 0.7993, "step": 12984 }, { "epoch": 0.4586536211471639, "grad_norm": 1.5320228338241577, "learning_rate": 5.907582509387376e-06, "loss": 0.7833, "step": 12985 }, { "epoch": 0.45868894295087176, "grad_norm": 1.6822216510772705, "learning_rate": 5.907019995857145e-06, "loss": 0.8056, "step": 12986 }, { "epoch": 0.45872426475457967, "grad_norm": 1.6478691101074219, "learning_rate": 5.906457470456006e-06, "loss": 0.8023, "step": 12987 }, { "epoch": 0.4587595865582876, "grad_norm": 1.8986592292785645, "learning_rate": 5.9058949331913225e-06, "loss": 0.8205, "step": 12988 }, { "epoch": 0.4587949083619955, "grad_norm": 1.4785306453704834, "learning_rate": 5.905332384070457e-06, "loss": 0.7734, "step": 12989 }, { "epoch": 0.4588302301657034, "grad_norm": 1.8010624647140503, "learning_rate": 5.904769823100769e-06, "loss": 0.8208, "step": 12990 }, { "epoch": 0.4588655519694113, "grad_norm": 1.6318061351776123, "learning_rate": 5.904207250289625e-06, "loss": 0.8039, "step": 12991 }, { "epoch": 0.4589008737731192, "grad_norm": 1.7483999729156494, "learning_rate": 5.9036446656443865e-06, "loss": 0.8211, "step": 12992 }, { "epoch": 0.45893619557682713, "grad_norm": 1.6417418718338013, "learning_rate": 5.9030820691724165e-06, "loss": 0.7771, "step": 12993 }, { "epoch": 0.45897151738053504, "grad_norm": 1.642676830291748, "learning_rate": 5.902519460881077e-06, "loss": 0.7954, "step": 12994 }, { "epoch": 0.45900683918424295, "grad_norm": 1.6858447790145874, "learning_rate": 5.901956840777735e-06, "loss": 0.807, "step": 12995 }, { "epoch": 0.45904216098795086, "grad_norm": 1.7247196435928345, "learning_rate": 5.901394208869749e-06, "loss": 0.81, "step": 12996 }, { "epoch": 0.4590774827916588, "grad_norm": 2.1290664672851562, "learning_rate": 5.900831565164487e-06, "loss": 0.8045, "step": 12997 }, { "epoch": 0.4591128045953667, "grad_norm": 1.8572362661361694, "learning_rate": 5.900268909669309e-06, "loss": 0.8015, "step": 12998 }, { "epoch": 0.4591481263990746, "grad_norm": 1.8749946355819702, "learning_rate": 5.899706242391582e-06, "loss": 0.8506, "step": 12999 }, { "epoch": 0.45918344820278245, "grad_norm": 1.6961257457733154, "learning_rate": 5.899143563338668e-06, "loss": 0.8449, "step": 13000 }, { "epoch": 0.45921877000649036, "grad_norm": 1.5737276077270508, "learning_rate": 5.8985808725179325e-06, "loss": 0.7812, "step": 13001 }, { "epoch": 0.4592540918101983, "grad_norm": 1.6009620428085327, "learning_rate": 5.89801816993674e-06, "loss": 0.8244, "step": 13002 }, { "epoch": 0.4592894136139062, "grad_norm": 1.8983449935913086, "learning_rate": 5.897455455602452e-06, "loss": 0.8198, "step": 13003 }, { "epoch": 0.4593247354176141, "grad_norm": 1.7675347328186035, "learning_rate": 5.8968927295224385e-06, "loss": 0.8253, "step": 13004 }, { "epoch": 0.459360057221322, "grad_norm": 1.651397466659546, "learning_rate": 5.89632999170406e-06, "loss": 0.8081, "step": 13005 }, { "epoch": 0.4593953790250299, "grad_norm": 1.7003116607666016, "learning_rate": 5.895767242154684e-06, "loss": 0.8027, "step": 13006 }, { "epoch": 0.4594307008287378, "grad_norm": 1.7334363460540771, "learning_rate": 5.895204480881676e-06, "loss": 0.7957, "step": 13007 }, { "epoch": 0.45946602263244574, "grad_norm": 2.0239903926849365, "learning_rate": 5.894641707892397e-06, "loss": 0.8318, "step": 13008 }, { "epoch": 0.45950134443615365, "grad_norm": 2.1959893703460693, "learning_rate": 5.894078923194218e-06, "loss": 0.8169, "step": 13009 }, { "epoch": 0.45953666623986156, "grad_norm": 1.556397795677185, "learning_rate": 5.893516126794501e-06, "loss": 0.788, "step": 13010 }, { "epoch": 0.45957198804356947, "grad_norm": 1.8083449602127075, "learning_rate": 5.892953318700612e-06, "loss": 0.7979, "step": 13011 }, { "epoch": 0.4596073098472774, "grad_norm": 1.600620150566101, "learning_rate": 5.892390498919919e-06, "loss": 0.8153, "step": 13012 }, { "epoch": 0.45964263165098523, "grad_norm": 1.573356032371521, "learning_rate": 5.891827667459787e-06, "loss": 0.771, "step": 13013 }, { "epoch": 0.45967795345469314, "grad_norm": 1.7050861120224, "learning_rate": 5.891264824327582e-06, "loss": 0.7839, "step": 13014 }, { "epoch": 0.45971327525840106, "grad_norm": 1.6776171922683716, "learning_rate": 5.8907019695306696e-06, "loss": 0.7595, "step": 13015 }, { "epoch": 0.45974859706210897, "grad_norm": 1.7856982946395874, "learning_rate": 5.890139103076419e-06, "loss": 0.8135, "step": 13016 }, { "epoch": 0.4597839188658169, "grad_norm": 1.6079208850860596, "learning_rate": 5.889576224972194e-06, "loss": 0.8115, "step": 13017 }, { "epoch": 0.4598192406695248, "grad_norm": 1.6921133995056152, "learning_rate": 5.889013335225363e-06, "loss": 0.8163, "step": 13018 }, { "epoch": 0.4598545624732327, "grad_norm": 1.4754077196121216, "learning_rate": 5.888450433843291e-06, "loss": 0.7799, "step": 13019 }, { "epoch": 0.4598898842769406, "grad_norm": 1.6489942073822021, "learning_rate": 5.887887520833349e-06, "loss": 0.7665, "step": 13020 }, { "epoch": 0.4599252060806485, "grad_norm": 1.7794290781021118, "learning_rate": 5.887324596202901e-06, "loss": 0.802, "step": 13021 }, { "epoch": 0.45996052788435643, "grad_norm": 1.7194092273712158, "learning_rate": 5.886761659959315e-06, "loss": 0.7631, "step": 13022 }, { "epoch": 0.45999584968806434, "grad_norm": 1.6294918060302734, "learning_rate": 5.886198712109961e-06, "loss": 0.8169, "step": 13023 }, { "epoch": 0.46003117149177225, "grad_norm": 1.6205544471740723, "learning_rate": 5.885635752662203e-06, "loss": 0.8073, "step": 13024 }, { "epoch": 0.46006649329548016, "grad_norm": 1.693846583366394, "learning_rate": 5.885072781623411e-06, "loss": 0.7772, "step": 13025 }, { "epoch": 0.460101815099188, "grad_norm": 1.5491174459457397, "learning_rate": 5.8845097990009536e-06, "loss": 0.816, "step": 13026 }, { "epoch": 0.4601371369028959, "grad_norm": 1.7676447629928589, "learning_rate": 5.8839468048021974e-06, "loss": 0.7659, "step": 13027 }, { "epoch": 0.46017245870660384, "grad_norm": 1.6870191097259521, "learning_rate": 5.883383799034512e-06, "loss": 0.7993, "step": 13028 }, { "epoch": 0.46020778051031175, "grad_norm": 1.476081132888794, "learning_rate": 5.882820781705265e-06, "loss": 0.7931, "step": 13029 }, { "epoch": 0.46024310231401966, "grad_norm": 1.680846095085144, "learning_rate": 5.882257752821825e-06, "loss": 0.7919, "step": 13030 }, { "epoch": 0.46027842411772757, "grad_norm": 1.7802773714065552, "learning_rate": 5.881694712391563e-06, "loss": 0.7823, "step": 13031 }, { "epoch": 0.4603137459214355, "grad_norm": 1.642352819442749, "learning_rate": 5.881131660421845e-06, "loss": 0.813, "step": 13032 }, { "epoch": 0.4603490677251434, "grad_norm": 1.7635316848754883, "learning_rate": 5.880568596920042e-06, "loss": 0.8096, "step": 13033 }, { "epoch": 0.4603843895288513, "grad_norm": 1.6479486227035522, "learning_rate": 5.8800055218935245e-06, "loss": 0.8084, "step": 13034 }, { "epoch": 0.4604197113325592, "grad_norm": 1.6345775127410889, "learning_rate": 5.8794424353496585e-06, "loss": 0.7819, "step": 13035 }, { "epoch": 0.4604550331362671, "grad_norm": 1.7117727994918823, "learning_rate": 5.878879337295815e-06, "loss": 0.8151, "step": 13036 }, { "epoch": 0.46049035493997503, "grad_norm": 0.9232218265533447, "learning_rate": 5.878316227739364e-06, "loss": 0.6055, "step": 13037 }, { "epoch": 0.46052567674368294, "grad_norm": 1.5831270217895508, "learning_rate": 5.8777531066876745e-06, "loss": 0.8171, "step": 13038 }, { "epoch": 0.4605609985473908, "grad_norm": 1.7353733777999878, "learning_rate": 5.877189974148119e-06, "loss": 0.7877, "step": 13039 }, { "epoch": 0.4605963203510987, "grad_norm": 1.665543556213379, "learning_rate": 5.8766268301280645e-06, "loss": 0.8156, "step": 13040 }, { "epoch": 0.4606316421548066, "grad_norm": 1.512191653251648, "learning_rate": 5.876063674634884e-06, "loss": 0.7797, "step": 13041 }, { "epoch": 0.46066696395851453, "grad_norm": 1.935804843902588, "learning_rate": 5.875500507675946e-06, "loss": 0.7909, "step": 13042 }, { "epoch": 0.46070228576222244, "grad_norm": 1.6306157112121582, "learning_rate": 5.874937329258622e-06, "loss": 0.7735, "step": 13043 }, { "epoch": 0.46073760756593035, "grad_norm": 1.801589012145996, "learning_rate": 5.874374139390285e-06, "loss": 0.8114, "step": 13044 }, { "epoch": 0.46077292936963826, "grad_norm": 1.6506879329681396, "learning_rate": 5.873810938078301e-06, "loss": 0.7757, "step": 13045 }, { "epoch": 0.4608082511733462, "grad_norm": 1.8983755111694336, "learning_rate": 5.873247725330045e-06, "loss": 0.817, "step": 13046 }, { "epoch": 0.4608435729770541, "grad_norm": 1.590043544769287, "learning_rate": 5.872684501152886e-06, "loss": 0.8022, "step": 13047 }, { "epoch": 0.460878894780762, "grad_norm": 1.5567820072174072, "learning_rate": 5.872121265554196e-06, "loss": 0.7553, "step": 13048 }, { "epoch": 0.4609142165844699, "grad_norm": 1.6938146352767944, "learning_rate": 5.871558018541348e-06, "loss": 0.7931, "step": 13049 }, { "epoch": 0.4609495383881778, "grad_norm": 1.7010568380355835, "learning_rate": 5.870994760121712e-06, "loss": 0.8075, "step": 13050 }, { "epoch": 0.4609848601918857, "grad_norm": 1.5752220153808594, "learning_rate": 5.8704314903026595e-06, "loss": 0.7993, "step": 13051 }, { "epoch": 0.4610201819955936, "grad_norm": 1.7820746898651123, "learning_rate": 5.869868209091565e-06, "loss": 0.7771, "step": 13052 }, { "epoch": 0.4610555037993015, "grad_norm": 1.4879390001296997, "learning_rate": 5.8693049164957975e-06, "loss": 0.7575, "step": 13053 }, { "epoch": 0.4610908256030094, "grad_norm": 1.5583595037460327, "learning_rate": 5.868741612522731e-06, "loss": 0.7809, "step": 13054 }, { "epoch": 0.4611261474067173, "grad_norm": 1.6291835308074951, "learning_rate": 5.868178297179738e-06, "loss": 0.7784, "step": 13055 }, { "epoch": 0.4611614692104252, "grad_norm": 1.6437236070632935, "learning_rate": 5.8676149704741905e-06, "loss": 0.8159, "step": 13056 }, { "epoch": 0.46119679101413313, "grad_norm": 1.599335789680481, "learning_rate": 5.8670516324134605e-06, "loss": 0.7587, "step": 13057 }, { "epoch": 0.46123211281784104, "grad_norm": 1.566920280456543, "learning_rate": 5.866488283004923e-06, "loss": 0.7828, "step": 13058 }, { "epoch": 0.46126743462154896, "grad_norm": 1.5576971769332886, "learning_rate": 5.865924922255949e-06, "loss": 0.8068, "step": 13059 }, { "epoch": 0.46130275642525687, "grad_norm": 1.7222234010696411, "learning_rate": 5.865361550173912e-06, "loss": 0.8134, "step": 13060 }, { "epoch": 0.4613380782289648, "grad_norm": 1.5822031497955322, "learning_rate": 5.864798166766186e-06, "loss": 0.8018, "step": 13061 }, { "epoch": 0.4613734000326727, "grad_norm": 1.6825650930404663, "learning_rate": 5.864234772040146e-06, "loss": 0.8407, "step": 13062 }, { "epoch": 0.4614087218363806, "grad_norm": 1.5596342086791992, "learning_rate": 5.863671366003162e-06, "loss": 0.7862, "step": 13063 }, { "epoch": 0.4614440436400885, "grad_norm": 1.6724737882614136, "learning_rate": 5.86310794866261e-06, "loss": 0.7898, "step": 13064 }, { "epoch": 0.46147936544379636, "grad_norm": 1.5930757522583008, "learning_rate": 5.862544520025862e-06, "loss": 0.8037, "step": 13065 }, { "epoch": 0.4615146872475043, "grad_norm": 1.5138611793518066, "learning_rate": 5.861981080100294e-06, "loss": 0.7467, "step": 13066 }, { "epoch": 0.4615500090512122, "grad_norm": 1.6035226583480835, "learning_rate": 5.861417628893281e-06, "loss": 0.7957, "step": 13067 }, { "epoch": 0.4615853308549201, "grad_norm": 1.6739213466644287, "learning_rate": 5.860854166412195e-06, "loss": 0.7683, "step": 13068 }, { "epoch": 0.461620652658628, "grad_norm": 1.5902961492538452, "learning_rate": 5.860290692664411e-06, "loss": 0.7893, "step": 13069 }, { "epoch": 0.4616559744623359, "grad_norm": 1.670838713645935, "learning_rate": 5.859727207657305e-06, "loss": 0.811, "step": 13070 }, { "epoch": 0.4616912962660438, "grad_norm": 1.8158562183380127, "learning_rate": 5.859163711398251e-06, "loss": 0.7921, "step": 13071 }, { "epoch": 0.46172661806975174, "grad_norm": 1.5117868185043335, "learning_rate": 5.8586002038946245e-06, "loss": 0.7588, "step": 13072 }, { "epoch": 0.46176193987345965, "grad_norm": 1.6112686395645142, "learning_rate": 5.8580366851538e-06, "loss": 0.8003, "step": 13073 }, { "epoch": 0.46179726167716756, "grad_norm": 1.6221392154693604, "learning_rate": 5.85747315518315e-06, "loss": 0.7936, "step": 13074 }, { "epoch": 0.46183258348087547, "grad_norm": 1.7732341289520264, "learning_rate": 5.856909613990055e-06, "loss": 0.8103, "step": 13075 }, { "epoch": 0.4618679052845834, "grad_norm": 1.8025147914886475, "learning_rate": 5.8563460615818865e-06, "loss": 0.8265, "step": 13076 }, { "epoch": 0.4619032270882913, "grad_norm": 1.6779249906539917, "learning_rate": 5.855782497966023e-06, "loss": 0.7935, "step": 13077 }, { "epoch": 0.46193854889199915, "grad_norm": 1.6439495086669922, "learning_rate": 5.855218923149838e-06, "loss": 0.8145, "step": 13078 }, { "epoch": 0.46197387069570706, "grad_norm": 1.9018433094024658, "learning_rate": 5.85465533714071e-06, "loss": 0.7998, "step": 13079 }, { "epoch": 0.46200919249941497, "grad_norm": 1.7481187582015991, "learning_rate": 5.8540917399460115e-06, "loss": 0.8, "step": 13080 }, { "epoch": 0.4620445143031229, "grad_norm": 1.6967490911483765, "learning_rate": 5.853528131573123e-06, "loss": 0.8019, "step": 13081 }, { "epoch": 0.4620798361068308, "grad_norm": 1.6943762302398682, "learning_rate": 5.852964512029418e-06, "loss": 0.8556, "step": 13082 }, { "epoch": 0.4621151579105387, "grad_norm": 1.5937365293502808, "learning_rate": 5.852400881322273e-06, "loss": 0.7906, "step": 13083 }, { "epoch": 0.4621504797142466, "grad_norm": 1.7102885246276855, "learning_rate": 5.851837239459066e-06, "loss": 0.7985, "step": 13084 }, { "epoch": 0.4621858015179545, "grad_norm": 1.594788670539856, "learning_rate": 5.851273586447172e-06, "loss": 0.7827, "step": 13085 }, { "epoch": 0.46222112332166243, "grad_norm": 1.7351996898651123, "learning_rate": 5.850709922293972e-06, "loss": 0.8051, "step": 13086 }, { "epoch": 0.46225644512537034, "grad_norm": 0.9300709366798401, "learning_rate": 5.85014624700684e-06, "loss": 0.601, "step": 13087 }, { "epoch": 0.46229176692907825, "grad_norm": 1.6770328283309937, "learning_rate": 5.849582560593152e-06, "loss": 0.8059, "step": 13088 }, { "epoch": 0.46232708873278616, "grad_norm": 1.7141879796981812, "learning_rate": 5.849018863060288e-06, "loss": 0.8073, "step": 13089 }, { "epoch": 0.4623624105364941, "grad_norm": 1.8906813859939575, "learning_rate": 5.848455154415625e-06, "loss": 0.8013, "step": 13090 }, { "epoch": 0.46239773234020193, "grad_norm": 1.8209887742996216, "learning_rate": 5.847891434666541e-06, "loss": 0.7828, "step": 13091 }, { "epoch": 0.46243305414390984, "grad_norm": 1.6721917390823364, "learning_rate": 5.8473277038204125e-06, "loss": 0.812, "step": 13092 }, { "epoch": 0.46246837594761775, "grad_norm": 1.721433401107788, "learning_rate": 5.846763961884617e-06, "loss": 0.7939, "step": 13093 }, { "epoch": 0.46250369775132566, "grad_norm": 1.610836148262024, "learning_rate": 5.846200208866536e-06, "loss": 0.8153, "step": 13094 }, { "epoch": 0.46253901955503357, "grad_norm": 2.268109083175659, "learning_rate": 5.8456364447735436e-06, "loss": 0.8074, "step": 13095 }, { "epoch": 0.4625743413587415, "grad_norm": 1.7475966215133667, "learning_rate": 5.845072669613022e-06, "loss": 0.8387, "step": 13096 }, { "epoch": 0.4626096631624494, "grad_norm": 1.5759645700454712, "learning_rate": 5.844508883392349e-06, "loss": 0.8105, "step": 13097 }, { "epoch": 0.4626449849661573, "grad_norm": 1.8727824687957764, "learning_rate": 5.843945086118901e-06, "loss": 0.7796, "step": 13098 }, { "epoch": 0.4626803067698652, "grad_norm": 2.2845053672790527, "learning_rate": 5.8433812778000594e-06, "loss": 0.8034, "step": 13099 }, { "epoch": 0.4627156285735731, "grad_norm": 1.6445704698562622, "learning_rate": 5.8428174584432005e-06, "loss": 0.8336, "step": 13100 }, { "epoch": 0.46275095037728103, "grad_norm": 1.6641162633895874, "learning_rate": 5.842253628055707e-06, "loss": 0.7844, "step": 13101 }, { "epoch": 0.46278627218098894, "grad_norm": 1.5558910369873047, "learning_rate": 5.841689786644955e-06, "loss": 0.7926, "step": 13102 }, { "epoch": 0.46282159398469686, "grad_norm": 1.844400405883789, "learning_rate": 5.841125934218325e-06, "loss": 0.8115, "step": 13103 }, { "epoch": 0.4628569157884047, "grad_norm": 1.731921672821045, "learning_rate": 5.840562070783199e-06, "loss": 0.8029, "step": 13104 }, { "epoch": 0.4628922375921126, "grad_norm": 1.6589187383651733, "learning_rate": 5.839998196346953e-06, "loss": 0.8205, "step": 13105 }, { "epoch": 0.46292755939582053, "grad_norm": 1.8016921281814575, "learning_rate": 5.839434310916968e-06, "loss": 0.7602, "step": 13106 }, { "epoch": 0.46296288119952844, "grad_norm": 1.6422306299209595, "learning_rate": 5.838870414500626e-06, "loss": 0.7985, "step": 13107 }, { "epoch": 0.46299820300323635, "grad_norm": 1.6192762851715088, "learning_rate": 5.8383065071053045e-06, "loss": 0.8088, "step": 13108 }, { "epoch": 0.46303352480694426, "grad_norm": 1.8903154134750366, "learning_rate": 5.8377425887383865e-06, "loss": 0.799, "step": 13109 }, { "epoch": 0.4630688466106522, "grad_norm": 1.6115798950195312, "learning_rate": 5.83717865940725e-06, "loss": 0.8038, "step": 13110 }, { "epoch": 0.4631041684143601, "grad_norm": 1.7509520053863525, "learning_rate": 5.836614719119276e-06, "loss": 0.8065, "step": 13111 }, { "epoch": 0.463139490218068, "grad_norm": 1.8047808408737183, "learning_rate": 5.8360507678818466e-06, "loss": 0.7809, "step": 13112 }, { "epoch": 0.4631748120217759, "grad_norm": 1.664739966392517, "learning_rate": 5.835486805702342e-06, "loss": 0.7898, "step": 13113 }, { "epoch": 0.4632101338254838, "grad_norm": 1.8709951639175415, "learning_rate": 5.834922832588142e-06, "loss": 0.8274, "step": 13114 }, { "epoch": 0.4632454556291917, "grad_norm": 1.0133273601531982, "learning_rate": 5.83435884854663e-06, "loss": 0.6127, "step": 13115 }, { "epoch": 0.46328077743289964, "grad_norm": 1.8817167282104492, "learning_rate": 5.833794853585186e-06, "loss": 0.7968, "step": 13116 }, { "epoch": 0.4633160992366075, "grad_norm": 1.626448392868042, "learning_rate": 5.8332308477111935e-06, "loss": 0.7905, "step": 13117 }, { "epoch": 0.4633514210403154, "grad_norm": 1.77108633518219, "learning_rate": 5.83266683093203e-06, "loss": 0.8036, "step": 13118 }, { "epoch": 0.4633867428440233, "grad_norm": 1.6953061819076538, "learning_rate": 5.8321028032550795e-06, "loss": 0.8067, "step": 13119 }, { "epoch": 0.4634220646477312, "grad_norm": 1.7823817729949951, "learning_rate": 5.831538764687725e-06, "loss": 0.8006, "step": 13120 }, { "epoch": 0.46345738645143914, "grad_norm": 1.9187734127044678, "learning_rate": 5.8309747152373476e-06, "loss": 0.8101, "step": 13121 }, { "epoch": 0.46349270825514705, "grad_norm": 1.5430430173873901, "learning_rate": 5.830410654911327e-06, "loss": 0.7686, "step": 13122 }, { "epoch": 0.46352803005885496, "grad_norm": 1.6217470169067383, "learning_rate": 5.82984658371705e-06, "loss": 0.7934, "step": 13123 }, { "epoch": 0.46356335186256287, "grad_norm": 2.060389518737793, "learning_rate": 5.829282501661896e-06, "loss": 0.8212, "step": 13124 }, { "epoch": 0.4635986736662708, "grad_norm": 1.7178078889846802, "learning_rate": 5.828718408753249e-06, "loss": 0.8117, "step": 13125 }, { "epoch": 0.4636339954699787, "grad_norm": 1.8128384351730347, "learning_rate": 5.8281543049984915e-06, "loss": 0.7944, "step": 13126 }, { "epoch": 0.4636693172736866, "grad_norm": 1.669968605041504, "learning_rate": 5.8275901904050055e-06, "loss": 0.8109, "step": 13127 }, { "epoch": 0.4637046390773945, "grad_norm": 2.0189640522003174, "learning_rate": 5.8270260649801755e-06, "loss": 0.8409, "step": 13128 }, { "epoch": 0.4637399608811024, "grad_norm": 1.564310908317566, "learning_rate": 5.8264619287313825e-06, "loss": 0.7922, "step": 13129 }, { "epoch": 0.4637752826848103, "grad_norm": 1.6577045917510986, "learning_rate": 5.8258977816660126e-06, "loss": 0.7825, "step": 13130 }, { "epoch": 0.4638106044885182, "grad_norm": 1.8229082822799683, "learning_rate": 5.825333623791447e-06, "loss": 0.7787, "step": 13131 }, { "epoch": 0.4638459262922261, "grad_norm": 1.8395785093307495, "learning_rate": 5.82476945511507e-06, "loss": 0.7823, "step": 13132 }, { "epoch": 0.463881248095934, "grad_norm": 1.923904299736023, "learning_rate": 5.824205275644266e-06, "loss": 0.8214, "step": 13133 }, { "epoch": 0.4639165698996419, "grad_norm": 1.6032016277313232, "learning_rate": 5.823641085386418e-06, "loss": 0.7374, "step": 13134 }, { "epoch": 0.46395189170334983, "grad_norm": 1.8308526277542114, "learning_rate": 5.82307688434891e-06, "loss": 0.8104, "step": 13135 }, { "epoch": 0.46398721350705774, "grad_norm": 1.9057129621505737, "learning_rate": 5.822512672539128e-06, "loss": 0.7855, "step": 13136 }, { "epoch": 0.46402253531076565, "grad_norm": 1.6378159523010254, "learning_rate": 5.821948449964452e-06, "loss": 0.8328, "step": 13137 }, { "epoch": 0.46405785711447356, "grad_norm": 1.8541553020477295, "learning_rate": 5.821384216632271e-06, "loss": 0.8091, "step": 13138 }, { "epoch": 0.46409317891818147, "grad_norm": 1.6704072952270508, "learning_rate": 5.820819972549968e-06, "loss": 0.83, "step": 13139 }, { "epoch": 0.4641285007218894, "grad_norm": 1.7130234241485596, "learning_rate": 5.8202557177249266e-06, "loss": 0.7954, "step": 13140 }, { "epoch": 0.4641638225255973, "grad_norm": 1.6256935596466064, "learning_rate": 5.819691452164533e-06, "loss": 0.827, "step": 13141 }, { "epoch": 0.4641991443293052, "grad_norm": 1.8364050388336182, "learning_rate": 5.819127175876172e-06, "loss": 0.8147, "step": 13142 }, { "epoch": 0.46423446613301306, "grad_norm": 1.7972793579101562, "learning_rate": 5.818562888867228e-06, "loss": 0.8151, "step": 13143 }, { "epoch": 0.46426978793672097, "grad_norm": 1.8479913473129272, "learning_rate": 5.817998591145086e-06, "loss": 0.8186, "step": 13144 }, { "epoch": 0.4643051097404289, "grad_norm": 1.82337486743927, "learning_rate": 5.817434282717134e-06, "loss": 0.8169, "step": 13145 }, { "epoch": 0.4643404315441368, "grad_norm": 1.8241803646087646, "learning_rate": 5.816869963590754e-06, "loss": 0.7773, "step": 13146 }, { "epoch": 0.4643757533478447, "grad_norm": 2.424665927886963, "learning_rate": 5.816305633773336e-06, "loss": 0.8237, "step": 13147 }, { "epoch": 0.4644110751515526, "grad_norm": 1.824631690979004, "learning_rate": 5.81574129327226e-06, "loss": 0.8049, "step": 13148 }, { "epoch": 0.4644463969552605, "grad_norm": 1.5527878999710083, "learning_rate": 5.815176942094918e-06, "loss": 0.782, "step": 13149 }, { "epoch": 0.46448171875896843, "grad_norm": 1.5305061340332031, "learning_rate": 5.814612580248692e-06, "loss": 0.7725, "step": 13150 }, { "epoch": 0.46451704056267634, "grad_norm": 1.660353183746338, "learning_rate": 5.81404820774097e-06, "loss": 0.784, "step": 13151 }, { "epoch": 0.46455236236638425, "grad_norm": 1.6990795135498047, "learning_rate": 5.813483824579138e-06, "loss": 0.8119, "step": 13152 }, { "epoch": 0.46458768417009216, "grad_norm": 1.6331011056900024, "learning_rate": 5.812919430770582e-06, "loss": 0.8279, "step": 13153 }, { "epoch": 0.4646230059738001, "grad_norm": 1.6987626552581787, "learning_rate": 5.812355026322691e-06, "loss": 0.8223, "step": 13154 }, { "epoch": 0.464658327777508, "grad_norm": 1.8186185359954834, "learning_rate": 5.81179061124285e-06, "loss": 0.7943, "step": 13155 }, { "epoch": 0.46469364958121584, "grad_norm": 1.0756056308746338, "learning_rate": 5.811226185538444e-06, "loss": 0.6176, "step": 13156 }, { "epoch": 0.46472897138492375, "grad_norm": 1.7244391441345215, "learning_rate": 5.8106617492168635e-06, "loss": 0.8381, "step": 13157 }, { "epoch": 0.46476429318863166, "grad_norm": 1.7676559686660767, "learning_rate": 5.8100973022854935e-06, "loss": 0.7541, "step": 13158 }, { "epoch": 0.46479961499233957, "grad_norm": 1.6310499906539917, "learning_rate": 5.809532844751724e-06, "loss": 0.8286, "step": 13159 }, { "epoch": 0.4648349367960475, "grad_norm": 1.7874042987823486, "learning_rate": 5.80896837662294e-06, "loss": 0.7959, "step": 13160 }, { "epoch": 0.4648702585997554, "grad_norm": 2.0581812858581543, "learning_rate": 5.80840389790653e-06, "loss": 0.7996, "step": 13161 }, { "epoch": 0.4649055804034633, "grad_norm": 1.7602876424789429, "learning_rate": 5.807839408609881e-06, "loss": 0.8294, "step": 13162 }, { "epoch": 0.4649409022071712, "grad_norm": 2.021747350692749, "learning_rate": 5.807274908740383e-06, "loss": 0.7921, "step": 13163 }, { "epoch": 0.4649762240108791, "grad_norm": 1.6597527265548706, "learning_rate": 5.806710398305422e-06, "loss": 0.7993, "step": 13164 }, { "epoch": 0.46501154581458704, "grad_norm": 1.6003811359405518, "learning_rate": 5.806145877312388e-06, "loss": 0.783, "step": 13165 }, { "epoch": 0.46504686761829495, "grad_norm": 1.8871668577194214, "learning_rate": 5.805581345768666e-06, "loss": 0.8315, "step": 13166 }, { "epoch": 0.46508218942200286, "grad_norm": 1.6198667287826538, "learning_rate": 5.805016803681649e-06, "loss": 0.8086, "step": 13167 }, { "epoch": 0.46511751122571077, "grad_norm": 1.6260011196136475, "learning_rate": 5.804452251058722e-06, "loss": 0.7917, "step": 13168 }, { "epoch": 0.4651528330294186, "grad_norm": 1.7097740173339844, "learning_rate": 5.803887687907275e-06, "loss": 0.8301, "step": 13169 }, { "epoch": 0.46518815483312653, "grad_norm": 1.671320915222168, "learning_rate": 5.803323114234698e-06, "loss": 0.8069, "step": 13170 }, { "epoch": 0.46522347663683444, "grad_norm": 1.640784740447998, "learning_rate": 5.802758530048379e-06, "loss": 0.8146, "step": 13171 }, { "epoch": 0.46525879844054235, "grad_norm": 2.04612135887146, "learning_rate": 5.802193935355706e-06, "loss": 0.8198, "step": 13172 }, { "epoch": 0.46529412024425026, "grad_norm": 1.9769365787506104, "learning_rate": 5.801629330164073e-06, "loss": 0.7857, "step": 13173 }, { "epoch": 0.4653294420479582, "grad_norm": 1.8756531476974487, "learning_rate": 5.801064714480862e-06, "loss": 0.7815, "step": 13174 }, { "epoch": 0.4653647638516661, "grad_norm": 1.7264032363891602, "learning_rate": 5.800500088313469e-06, "loss": 0.7892, "step": 13175 }, { "epoch": 0.465400085655374, "grad_norm": 1.5062230825424194, "learning_rate": 5.799935451669281e-06, "loss": 0.8048, "step": 13176 }, { "epoch": 0.4654354074590819, "grad_norm": 1.699811577796936, "learning_rate": 5.799370804555686e-06, "loss": 0.7921, "step": 13177 }, { "epoch": 0.4654707292627898, "grad_norm": 1.7005033493041992, "learning_rate": 5.798806146980079e-06, "loss": 0.7994, "step": 13178 }, { "epoch": 0.46550605106649773, "grad_norm": 1.7143582105636597, "learning_rate": 5.798241478949846e-06, "loss": 0.7982, "step": 13179 }, { "epoch": 0.46554137287020564, "grad_norm": 1.6175003051757812, "learning_rate": 5.7976768004723785e-06, "loss": 0.8091, "step": 13180 }, { "epoch": 0.46557669467391355, "grad_norm": 1.7976590394973755, "learning_rate": 5.797112111555068e-06, "loss": 0.8169, "step": 13181 }, { "epoch": 0.4656120164776214, "grad_norm": 1.6586847305297852, "learning_rate": 5.796547412205304e-06, "loss": 0.8185, "step": 13182 }, { "epoch": 0.4656473382813293, "grad_norm": 1.5608198642730713, "learning_rate": 5.795982702430476e-06, "loss": 0.7935, "step": 13183 }, { "epoch": 0.4656826600850372, "grad_norm": 1.51520836353302, "learning_rate": 5.795417982237977e-06, "loss": 0.8171, "step": 13184 }, { "epoch": 0.46571798188874514, "grad_norm": 1.739274501800537, "learning_rate": 5.794853251635195e-06, "loss": 0.8272, "step": 13185 }, { "epoch": 0.46575330369245305, "grad_norm": 1.59727942943573, "learning_rate": 5.794288510629526e-06, "loss": 0.8026, "step": 13186 }, { "epoch": 0.46578862549616096, "grad_norm": 1.7133815288543701, "learning_rate": 5.7937237592283554e-06, "loss": 0.7826, "step": 13187 }, { "epoch": 0.46582394729986887, "grad_norm": 1.6371983289718628, "learning_rate": 5.793158997439079e-06, "loss": 0.8059, "step": 13188 }, { "epoch": 0.4658592691035768, "grad_norm": 1.7072489261627197, "learning_rate": 5.792594225269087e-06, "loss": 0.8172, "step": 13189 }, { "epoch": 0.4658945909072847, "grad_norm": 1.7654073238372803, "learning_rate": 5.7920294427257705e-06, "loss": 0.7983, "step": 13190 }, { "epoch": 0.4659299127109926, "grad_norm": 2.830402374267578, "learning_rate": 5.791464649816523e-06, "loss": 0.8241, "step": 13191 }, { "epoch": 0.4659652345147005, "grad_norm": 2.073911666870117, "learning_rate": 5.790899846548732e-06, "loss": 0.7818, "step": 13192 }, { "epoch": 0.4660005563184084, "grad_norm": 1.7365020513534546, "learning_rate": 5.790335032929794e-06, "loss": 0.8596, "step": 13193 }, { "epoch": 0.46603587812211633, "grad_norm": 1.6204187870025635, "learning_rate": 5.7897702089671e-06, "loss": 0.8044, "step": 13194 }, { "epoch": 0.4660711999258242, "grad_norm": 1.668398380279541, "learning_rate": 5.789205374668041e-06, "loss": 0.7857, "step": 13195 }, { "epoch": 0.4661065217295321, "grad_norm": 2.0342249870300293, "learning_rate": 5.788640530040011e-06, "loss": 0.8302, "step": 13196 }, { "epoch": 0.46614184353324, "grad_norm": 1.5453804731369019, "learning_rate": 5.788075675090402e-06, "loss": 0.7806, "step": 13197 }, { "epoch": 0.4661771653369479, "grad_norm": 1.63693368434906, "learning_rate": 5.7875108098266065e-06, "loss": 0.8092, "step": 13198 }, { "epoch": 0.46621248714065583, "grad_norm": 1.6887370347976685, "learning_rate": 5.786945934256018e-06, "loss": 0.7742, "step": 13199 }, { "epoch": 0.46624780894436374, "grad_norm": 1.734156847000122, "learning_rate": 5.7863810483860285e-06, "loss": 0.8404, "step": 13200 }, { "epoch": 0.46628313074807165, "grad_norm": 1.7403664588928223, "learning_rate": 5.785816152224032e-06, "loss": 0.7914, "step": 13201 }, { "epoch": 0.46631845255177956, "grad_norm": 1.7864904403686523, "learning_rate": 5.785251245777422e-06, "loss": 0.7805, "step": 13202 }, { "epoch": 0.46635377435548747, "grad_norm": 1.68317711353302, "learning_rate": 5.784686329053589e-06, "loss": 0.8158, "step": 13203 }, { "epoch": 0.4663890961591954, "grad_norm": 1.795136570930481, "learning_rate": 5.7841214020599316e-06, "loss": 0.8008, "step": 13204 }, { "epoch": 0.4664244179629033, "grad_norm": 1.5580025911331177, "learning_rate": 5.783556464803839e-06, "loss": 0.7849, "step": 13205 }, { "epoch": 0.4664597397666112, "grad_norm": 1.6604636907577515, "learning_rate": 5.782991517292707e-06, "loss": 0.7738, "step": 13206 }, { "epoch": 0.4664950615703191, "grad_norm": 1.7738311290740967, "learning_rate": 5.782426559533929e-06, "loss": 0.7994, "step": 13207 }, { "epoch": 0.46653038337402697, "grad_norm": 1.6181308031082153, "learning_rate": 5.781861591534901e-06, "loss": 0.799, "step": 13208 }, { "epoch": 0.4665657051777349, "grad_norm": 1.5827929973602295, "learning_rate": 5.781296613303012e-06, "loss": 0.7647, "step": 13209 }, { "epoch": 0.4666010269814428, "grad_norm": 1.6654423475265503, "learning_rate": 5.780731624845663e-06, "loss": 0.7972, "step": 13210 }, { "epoch": 0.4666363487851507, "grad_norm": 1.556950569152832, "learning_rate": 5.780166626170245e-06, "loss": 0.7845, "step": 13211 }, { "epoch": 0.4666716705888586, "grad_norm": 1.6224937438964844, "learning_rate": 5.7796016172841515e-06, "loss": 0.7923, "step": 13212 }, { "epoch": 0.4667069923925665, "grad_norm": 1.7293972969055176, "learning_rate": 5.7790365981947795e-06, "loss": 0.8113, "step": 13213 }, { "epoch": 0.46674231419627443, "grad_norm": 1.7421140670776367, "learning_rate": 5.778471568909522e-06, "loss": 0.8248, "step": 13214 }, { "epoch": 0.46677763599998234, "grad_norm": 1.600284457206726, "learning_rate": 5.777906529435775e-06, "loss": 0.7561, "step": 13215 }, { "epoch": 0.46681295780369025, "grad_norm": 1.7067803144454956, "learning_rate": 5.777341479780934e-06, "loss": 0.7829, "step": 13216 }, { "epoch": 0.46684827960739816, "grad_norm": 1.6121412515640259, "learning_rate": 5.776776419952395e-06, "loss": 0.7797, "step": 13217 }, { "epoch": 0.4668836014111061, "grad_norm": 1.6649831533432007, "learning_rate": 5.77621134995755e-06, "loss": 0.7937, "step": 13218 }, { "epoch": 0.466918923214814, "grad_norm": 1.5567607879638672, "learning_rate": 5.775646269803798e-06, "loss": 0.8004, "step": 13219 }, { "epoch": 0.4669542450185219, "grad_norm": 1.7936148643493652, "learning_rate": 5.775081179498534e-06, "loss": 0.8645, "step": 13220 }, { "epoch": 0.46698956682222975, "grad_norm": 1.9623686075210571, "learning_rate": 5.7745160790491516e-06, "loss": 0.7966, "step": 13221 }, { "epoch": 0.46702488862593766, "grad_norm": 1.9013192653656006, "learning_rate": 5.77395096846305e-06, "loss": 0.797, "step": 13222 }, { "epoch": 0.4670602104296456, "grad_norm": 1.8747705221176147, "learning_rate": 5.773385847747623e-06, "loss": 0.7964, "step": 13223 }, { "epoch": 0.4670955322333535, "grad_norm": 1.5391603708267212, "learning_rate": 5.772820716910267e-06, "loss": 0.8009, "step": 13224 }, { "epoch": 0.4671308540370614, "grad_norm": 1.7270456552505493, "learning_rate": 5.772255575958379e-06, "loss": 0.799, "step": 13225 }, { "epoch": 0.4671661758407693, "grad_norm": 1.6731408834457397, "learning_rate": 5.771690424899355e-06, "loss": 0.8229, "step": 13226 }, { "epoch": 0.4672014976444772, "grad_norm": 1.6465998888015747, "learning_rate": 5.771125263740593e-06, "loss": 0.7905, "step": 13227 }, { "epoch": 0.4672368194481851, "grad_norm": 1.740362524986267, "learning_rate": 5.7705600924894876e-06, "loss": 0.7926, "step": 13228 }, { "epoch": 0.46727214125189304, "grad_norm": 1.6132745742797852, "learning_rate": 5.769994911153436e-06, "loss": 0.8027, "step": 13229 }, { "epoch": 0.46730746305560095, "grad_norm": 1.7849512100219727, "learning_rate": 5.769429719739836e-06, "loss": 0.8319, "step": 13230 }, { "epoch": 0.46734278485930886, "grad_norm": 1.9804683923721313, "learning_rate": 5.768864518256085e-06, "loss": 0.8067, "step": 13231 }, { "epoch": 0.46737810666301677, "grad_norm": 2.166731834411621, "learning_rate": 5.768299306709579e-06, "loss": 0.7843, "step": 13232 }, { "epoch": 0.4674134284667247, "grad_norm": 1.7571725845336914, "learning_rate": 5.767734085107717e-06, "loss": 0.8385, "step": 13233 }, { "epoch": 0.46744875027043253, "grad_norm": 1.5701850652694702, "learning_rate": 5.767168853457895e-06, "loss": 0.7568, "step": 13234 }, { "epoch": 0.46748407207414044, "grad_norm": 1.7057108879089355, "learning_rate": 5.766603611767511e-06, "loss": 0.8212, "step": 13235 }, { "epoch": 0.46751939387784835, "grad_norm": 1.8645777702331543, "learning_rate": 5.766038360043964e-06, "loss": 0.7449, "step": 13236 }, { "epoch": 0.46755471568155627, "grad_norm": 1.8857381343841553, "learning_rate": 5.76547309829465e-06, "loss": 0.8184, "step": 13237 }, { "epoch": 0.4675900374852642, "grad_norm": 1.740071415901184, "learning_rate": 5.764907826526969e-06, "loss": 0.8205, "step": 13238 }, { "epoch": 0.4676253592889721, "grad_norm": 1.762346625328064, "learning_rate": 5.764342544748318e-06, "loss": 0.81, "step": 13239 }, { "epoch": 0.46766068109268, "grad_norm": 1.794503092765808, "learning_rate": 5.763777252966094e-06, "loss": 0.8134, "step": 13240 }, { "epoch": 0.4676960028963879, "grad_norm": 1.7281129360198975, "learning_rate": 5.763211951187698e-06, "loss": 0.811, "step": 13241 }, { "epoch": 0.4677313247000958, "grad_norm": 1.819433331489563, "learning_rate": 5.762646639420528e-06, "loss": 0.8203, "step": 13242 }, { "epoch": 0.46776664650380373, "grad_norm": 1.7102266550064087, "learning_rate": 5.76208131767198e-06, "loss": 0.8327, "step": 13243 }, { "epoch": 0.46780196830751164, "grad_norm": 1.7119110822677612, "learning_rate": 5.7615159859494575e-06, "loss": 0.7968, "step": 13244 }, { "epoch": 0.46783729011121955, "grad_norm": 1.5721944570541382, "learning_rate": 5.760950644260355e-06, "loss": 0.807, "step": 13245 }, { "epoch": 0.46787261191492746, "grad_norm": 1.569526195526123, "learning_rate": 5.760385292612074e-06, "loss": 0.778, "step": 13246 }, { "epoch": 0.4679079337186353, "grad_norm": 1.6752241849899292, "learning_rate": 5.759819931012015e-06, "loss": 0.8155, "step": 13247 }, { "epoch": 0.4679432555223432, "grad_norm": 1.640252947807312, "learning_rate": 5.759254559467573e-06, "loss": 0.7878, "step": 13248 }, { "epoch": 0.46797857732605114, "grad_norm": 1.8601757287979126, "learning_rate": 5.758689177986152e-06, "loss": 0.7927, "step": 13249 }, { "epoch": 0.46801389912975905, "grad_norm": 2.2764015197753906, "learning_rate": 5.7581237865751486e-06, "loss": 0.8453, "step": 13250 }, { "epoch": 0.46804922093346696, "grad_norm": 1.592085599899292, "learning_rate": 5.757558385241963e-06, "loss": 0.8031, "step": 13251 }, { "epoch": 0.46808454273717487, "grad_norm": 1.948598861694336, "learning_rate": 5.756992973993998e-06, "loss": 0.7649, "step": 13252 }, { "epoch": 0.4681198645408828, "grad_norm": 1.6462769508361816, "learning_rate": 5.756427552838648e-06, "loss": 0.8281, "step": 13253 }, { "epoch": 0.4681551863445907, "grad_norm": 1.8142657279968262, "learning_rate": 5.755862121783319e-06, "loss": 0.8092, "step": 13254 }, { "epoch": 0.4681905081482986, "grad_norm": 1.6459503173828125, "learning_rate": 5.755296680835408e-06, "loss": 0.759, "step": 13255 }, { "epoch": 0.4682258299520065, "grad_norm": 1.8135557174682617, "learning_rate": 5.754731230002316e-06, "loss": 0.7625, "step": 13256 }, { "epoch": 0.4682611517557144, "grad_norm": 1.6933834552764893, "learning_rate": 5.754165769291443e-06, "loss": 0.8085, "step": 13257 }, { "epoch": 0.46829647355942233, "grad_norm": 1.6155831813812256, "learning_rate": 5.75360029871019e-06, "loss": 0.8255, "step": 13258 }, { "epoch": 0.46833179536313024, "grad_norm": 1.7501391172409058, "learning_rate": 5.753034818265959e-06, "loss": 0.842, "step": 13259 }, { "epoch": 0.4683671171668381, "grad_norm": 1.7504793405532837, "learning_rate": 5.752469327966149e-06, "loss": 0.7602, "step": 13260 }, { "epoch": 0.468402438970546, "grad_norm": 1.8719648122787476, "learning_rate": 5.751903827818161e-06, "loss": 0.7939, "step": 13261 }, { "epoch": 0.4684377607742539, "grad_norm": 1.7846895456314087, "learning_rate": 5.751338317829399e-06, "loss": 0.7789, "step": 13262 }, { "epoch": 0.46847308257796183, "grad_norm": 1.6396359205245972, "learning_rate": 5.750772798007262e-06, "loss": 0.7498, "step": 13263 }, { "epoch": 0.46850840438166974, "grad_norm": 1.6138540506362915, "learning_rate": 5.750207268359151e-06, "loss": 0.7998, "step": 13264 }, { "epoch": 0.46854372618537765, "grad_norm": 1.7645167112350464, "learning_rate": 5.74964172889247e-06, "loss": 0.7869, "step": 13265 }, { "epoch": 0.46857904798908556, "grad_norm": 1.9226809740066528, "learning_rate": 5.749076179614616e-06, "loss": 0.7587, "step": 13266 }, { "epoch": 0.4686143697927935, "grad_norm": 1.831497073173523, "learning_rate": 5.748510620532996e-06, "loss": 0.8165, "step": 13267 }, { "epoch": 0.4686496915965014, "grad_norm": 1.6957104206085205, "learning_rate": 5.747945051655009e-06, "loss": 0.794, "step": 13268 }, { "epoch": 0.4686850134002093, "grad_norm": 1.541285753250122, "learning_rate": 5.747379472988058e-06, "loss": 0.7504, "step": 13269 }, { "epoch": 0.4687203352039172, "grad_norm": 1.5352258682250977, "learning_rate": 5.746813884539545e-06, "loss": 0.763, "step": 13270 }, { "epoch": 0.4687556570076251, "grad_norm": 1.705053448677063, "learning_rate": 5.746248286316873e-06, "loss": 0.8178, "step": 13271 }, { "epoch": 0.468790978811333, "grad_norm": 1.7489131689071655, "learning_rate": 5.745682678327441e-06, "loss": 0.7842, "step": 13272 }, { "epoch": 0.4688263006150409, "grad_norm": 1.7285380363464355, "learning_rate": 5.745117060578656e-06, "loss": 0.7698, "step": 13273 }, { "epoch": 0.4688616224187488, "grad_norm": 2.1211507320404053, "learning_rate": 5.74455143307792e-06, "loss": 0.7687, "step": 13274 }, { "epoch": 0.4688969442224567, "grad_norm": 1.511171579360962, "learning_rate": 5.743985795832634e-06, "loss": 0.762, "step": 13275 }, { "epoch": 0.4689322660261646, "grad_norm": 1.5935813188552856, "learning_rate": 5.7434201488502014e-06, "loss": 0.7837, "step": 13276 }, { "epoch": 0.4689675878298725, "grad_norm": 1.6990948915481567, "learning_rate": 5.742854492138026e-06, "loss": 0.8146, "step": 13277 }, { "epoch": 0.46900290963358043, "grad_norm": 1.7442858219146729, "learning_rate": 5.7422888257035105e-06, "loss": 0.7996, "step": 13278 }, { "epoch": 0.46903823143728834, "grad_norm": 1.915954351425171, "learning_rate": 5.741723149554057e-06, "loss": 0.7925, "step": 13279 }, { "epoch": 0.46907355324099625, "grad_norm": 1.7777023315429688, "learning_rate": 5.741157463697072e-06, "loss": 0.8215, "step": 13280 }, { "epoch": 0.46910887504470417, "grad_norm": 1.8830302953720093, "learning_rate": 5.740591768139958e-06, "loss": 0.8343, "step": 13281 }, { "epoch": 0.4691441968484121, "grad_norm": 1.7787748575210571, "learning_rate": 5.740026062890116e-06, "loss": 0.7827, "step": 13282 }, { "epoch": 0.46917951865212, "grad_norm": 1.8068400621414185, "learning_rate": 5.7394603479549535e-06, "loss": 0.8054, "step": 13283 }, { "epoch": 0.4692148404558279, "grad_norm": 1.8230162858963013, "learning_rate": 5.7388946233418726e-06, "loss": 0.7772, "step": 13284 }, { "epoch": 0.4692501622595358, "grad_norm": 1.5552104711532593, "learning_rate": 5.738328889058279e-06, "loss": 0.7607, "step": 13285 }, { "epoch": 0.46928548406324366, "grad_norm": 1.9663207530975342, "learning_rate": 5.7377631451115745e-06, "loss": 0.8, "step": 13286 }, { "epoch": 0.4693208058669516, "grad_norm": 1.8681719303131104, "learning_rate": 5.737197391509164e-06, "loss": 0.8114, "step": 13287 }, { "epoch": 0.4693561276706595, "grad_norm": 2.2101168632507324, "learning_rate": 5.736631628258454e-06, "loss": 0.7959, "step": 13288 }, { "epoch": 0.4693914494743674, "grad_norm": 1.9192882776260376, "learning_rate": 5.736065855366847e-06, "loss": 0.808, "step": 13289 }, { "epoch": 0.4694267712780753, "grad_norm": 1.8426240682601929, "learning_rate": 5.735500072841747e-06, "loss": 0.8273, "step": 13290 }, { "epoch": 0.4694620930817832, "grad_norm": 1.8325618505477905, "learning_rate": 5.734934280690563e-06, "loss": 0.8358, "step": 13291 }, { "epoch": 0.4694974148854911, "grad_norm": 1.7806463241577148, "learning_rate": 5.734368478920697e-06, "loss": 0.7936, "step": 13292 }, { "epoch": 0.46953273668919904, "grad_norm": 1.6726120710372925, "learning_rate": 5.733802667539553e-06, "loss": 0.7789, "step": 13293 }, { "epoch": 0.46956805849290695, "grad_norm": 1.792961597442627, "learning_rate": 5.7332368465545385e-06, "loss": 0.7712, "step": 13294 }, { "epoch": 0.46960338029661486, "grad_norm": 1.8175128698349, "learning_rate": 5.732671015973057e-06, "loss": 0.7951, "step": 13295 }, { "epoch": 0.46963870210032277, "grad_norm": 1.831591010093689, "learning_rate": 5.732105175802516e-06, "loss": 0.7682, "step": 13296 }, { "epoch": 0.4696740239040307, "grad_norm": 1.6759485006332397, "learning_rate": 5.73153932605032e-06, "loss": 0.7692, "step": 13297 }, { "epoch": 0.4697093457077386, "grad_norm": 1.6471118927001953, "learning_rate": 5.7309734667238735e-06, "loss": 0.8033, "step": 13298 }, { "epoch": 0.46974466751144645, "grad_norm": 1.7591183185577393, "learning_rate": 5.730407597830585e-06, "loss": 0.8076, "step": 13299 }, { "epoch": 0.46977998931515436, "grad_norm": 1.80845308303833, "learning_rate": 5.72984171937786e-06, "loss": 0.8213, "step": 13300 }, { "epoch": 0.46981531111886227, "grad_norm": 1.7124874591827393, "learning_rate": 5.7292758313731015e-06, "loss": 0.7693, "step": 13301 }, { "epoch": 0.4698506329225702, "grad_norm": 1.96462881565094, "learning_rate": 5.72870993382372e-06, "loss": 0.7693, "step": 13302 }, { "epoch": 0.4698859547262781, "grad_norm": 1.6680567264556885, "learning_rate": 5.728144026737118e-06, "loss": 0.8096, "step": 13303 }, { "epoch": 0.469921276529986, "grad_norm": 1.7472901344299316, "learning_rate": 5.727578110120705e-06, "loss": 0.8341, "step": 13304 }, { "epoch": 0.4699565983336939, "grad_norm": 1.806787133216858, "learning_rate": 5.727012183981887e-06, "loss": 0.7793, "step": 13305 }, { "epoch": 0.4699919201374018, "grad_norm": 1.0127904415130615, "learning_rate": 5.726446248328069e-06, "loss": 0.5621, "step": 13306 }, { "epoch": 0.47002724194110973, "grad_norm": 1.8182322978973389, "learning_rate": 5.7258803031666596e-06, "loss": 0.7776, "step": 13307 }, { "epoch": 0.47006256374481764, "grad_norm": 1.7546755075454712, "learning_rate": 5.725314348505065e-06, "loss": 0.7951, "step": 13308 }, { "epoch": 0.47009788554852555, "grad_norm": 1.6150486469268799, "learning_rate": 5.724748384350693e-06, "loss": 0.7838, "step": 13309 }, { "epoch": 0.47013320735223346, "grad_norm": 1.5821901559829712, "learning_rate": 5.72418241071095e-06, "loss": 0.8293, "step": 13310 }, { "epoch": 0.4701685291559414, "grad_norm": 1.7661470174789429, "learning_rate": 5.723616427593243e-06, "loss": 0.8547, "step": 13311 }, { "epoch": 0.4702038509596492, "grad_norm": 1.647132396697998, "learning_rate": 5.7230504350049824e-06, "loss": 0.8167, "step": 13312 }, { "epoch": 0.47023917276335714, "grad_norm": 1.78909432888031, "learning_rate": 5.722484432953572e-06, "loss": 0.8081, "step": 13313 }, { "epoch": 0.47027449456706505, "grad_norm": 2.17995023727417, "learning_rate": 5.721918421446422e-06, "loss": 0.8088, "step": 13314 }, { "epoch": 0.47030981637077296, "grad_norm": 1.6010053157806396, "learning_rate": 5.721352400490938e-06, "loss": 0.7703, "step": 13315 }, { "epoch": 0.47034513817448087, "grad_norm": 1.7453852891921997, "learning_rate": 5.7207863700945295e-06, "loss": 0.7919, "step": 13316 }, { "epoch": 0.4703804599781888, "grad_norm": 1.8686553239822388, "learning_rate": 5.720220330264606e-06, "loss": 0.8003, "step": 13317 }, { "epoch": 0.4704157817818967, "grad_norm": 1.7815779447555542, "learning_rate": 5.719654281008575e-06, "loss": 0.8394, "step": 13318 }, { "epoch": 0.4704511035856046, "grad_norm": 1.6925455331802368, "learning_rate": 5.719088222333841e-06, "loss": 0.8396, "step": 13319 }, { "epoch": 0.4704864253893125, "grad_norm": 1.7072778940200806, "learning_rate": 5.7185221542478165e-06, "loss": 0.8016, "step": 13320 }, { "epoch": 0.4705217471930204, "grad_norm": 1.7500269412994385, "learning_rate": 5.71795607675791e-06, "loss": 0.7927, "step": 13321 }, { "epoch": 0.47055706899672833, "grad_norm": 1.529667615890503, "learning_rate": 5.7173899898715305e-06, "loss": 0.8153, "step": 13322 }, { "epoch": 0.47059239080043624, "grad_norm": 1.7645139694213867, "learning_rate": 5.716823893596084e-06, "loss": 0.8056, "step": 13323 }, { "epoch": 0.47062771260414415, "grad_norm": 1.6107429265975952, "learning_rate": 5.71625778793898e-06, "loss": 0.7914, "step": 13324 }, { "epoch": 0.470663034407852, "grad_norm": 1.5628960132598877, "learning_rate": 5.715691672907631e-06, "loss": 0.8033, "step": 13325 }, { "epoch": 0.4706983562115599, "grad_norm": 1.7014590501785278, "learning_rate": 5.7151255485094425e-06, "loss": 0.8056, "step": 13326 }, { "epoch": 0.47073367801526783, "grad_norm": 1.6256893873214722, "learning_rate": 5.714559414751825e-06, "loss": 0.7939, "step": 13327 }, { "epoch": 0.47076899981897574, "grad_norm": 1.7062913179397583, "learning_rate": 5.713993271642189e-06, "loss": 0.7886, "step": 13328 }, { "epoch": 0.47080432162268365, "grad_norm": 1.6955057382583618, "learning_rate": 5.713427119187943e-06, "loss": 0.7659, "step": 13329 }, { "epoch": 0.47083964342639156, "grad_norm": 1.5865693092346191, "learning_rate": 5.7128609573964975e-06, "loss": 0.7633, "step": 13330 }, { "epoch": 0.4708749652300995, "grad_norm": 1.5979825258255005, "learning_rate": 5.712294786275262e-06, "loss": 0.8007, "step": 13331 }, { "epoch": 0.4709102870338074, "grad_norm": 1.8784533739089966, "learning_rate": 5.711728605831644e-06, "loss": 0.8146, "step": 13332 }, { "epoch": 0.4709456088375153, "grad_norm": 1.9071444272994995, "learning_rate": 5.711162416073058e-06, "loss": 0.8352, "step": 13333 }, { "epoch": 0.4709809306412232, "grad_norm": 1.790725827217102, "learning_rate": 5.710596217006912e-06, "loss": 0.8327, "step": 13334 }, { "epoch": 0.4710162524449311, "grad_norm": 1.630590558052063, "learning_rate": 5.710030008640614e-06, "loss": 0.8273, "step": 13335 }, { "epoch": 0.471051574248639, "grad_norm": 1.8188146352767944, "learning_rate": 5.70946379098158e-06, "loss": 0.8221, "step": 13336 }, { "epoch": 0.47108689605234694, "grad_norm": 1.8030515909194946, "learning_rate": 5.708897564037215e-06, "loss": 0.7968, "step": 13337 }, { "epoch": 0.4711222178560548, "grad_norm": 1.742610216140747, "learning_rate": 5.708331327814934e-06, "loss": 0.7929, "step": 13338 }, { "epoch": 0.4711575396597627, "grad_norm": 0.9821820855140686, "learning_rate": 5.707765082322144e-06, "loss": 0.5813, "step": 13339 }, { "epoch": 0.4711928614634706, "grad_norm": 1.8125680685043335, "learning_rate": 5.707198827566258e-06, "loss": 0.7973, "step": 13340 }, { "epoch": 0.4712281832671785, "grad_norm": 1.6738307476043701, "learning_rate": 5.706632563554687e-06, "loss": 0.785, "step": 13341 }, { "epoch": 0.47126350507088643, "grad_norm": 2.265225887298584, "learning_rate": 5.70606629029484e-06, "loss": 0.8071, "step": 13342 }, { "epoch": 0.47129882687459435, "grad_norm": 1.5509021282196045, "learning_rate": 5.705500007794133e-06, "loss": 0.7818, "step": 13343 }, { "epoch": 0.47133414867830226, "grad_norm": 1.6321152448654175, "learning_rate": 5.704933716059973e-06, "loss": 0.7649, "step": 13344 }, { "epoch": 0.47136947048201017, "grad_norm": 1.753496527671814, "learning_rate": 5.704367415099772e-06, "loss": 0.8541, "step": 13345 }, { "epoch": 0.4714047922857181, "grad_norm": 1.5553778409957886, "learning_rate": 5.703801104920944e-06, "loss": 0.7684, "step": 13346 }, { "epoch": 0.471440114089426, "grad_norm": 1.7850017547607422, "learning_rate": 5.7032347855309e-06, "loss": 0.802, "step": 13347 }, { "epoch": 0.4714754358931339, "grad_norm": 1.6771304607391357, "learning_rate": 5.702668456937051e-06, "loss": 0.8349, "step": 13348 }, { "epoch": 0.4715107576968418, "grad_norm": 1.8000317811965942, "learning_rate": 5.702102119146809e-06, "loss": 0.8083, "step": 13349 }, { "epoch": 0.4715460795005497, "grad_norm": 1.7076669931411743, "learning_rate": 5.701535772167586e-06, "loss": 0.813, "step": 13350 }, { "epoch": 0.4715814013042576, "grad_norm": 1.8028594255447388, "learning_rate": 5.700969416006795e-06, "loss": 0.8009, "step": 13351 }, { "epoch": 0.4716167231079655, "grad_norm": 1.6587533950805664, "learning_rate": 5.700403050671849e-06, "loss": 0.8148, "step": 13352 }, { "epoch": 0.4716520449116734, "grad_norm": 2.127347469329834, "learning_rate": 5.699836676170158e-06, "loss": 0.8007, "step": 13353 }, { "epoch": 0.4716873667153813, "grad_norm": 1.7020292282104492, "learning_rate": 5.699270292509137e-06, "loss": 0.8096, "step": 13354 }, { "epoch": 0.4717226885190892, "grad_norm": 0.9485320448875427, "learning_rate": 5.698703899696198e-06, "loss": 0.6136, "step": 13355 }, { "epoch": 0.47175801032279713, "grad_norm": 1.73954439163208, "learning_rate": 5.698137497738751e-06, "loss": 0.8195, "step": 13356 }, { "epoch": 0.47179333212650504, "grad_norm": 1.7847634553909302, "learning_rate": 5.697571086644216e-06, "loss": 0.7971, "step": 13357 }, { "epoch": 0.47182865393021295, "grad_norm": 1.7753939628601074, "learning_rate": 5.697004666419999e-06, "loss": 0.782, "step": 13358 }, { "epoch": 0.47186397573392086, "grad_norm": 1.6772422790527344, "learning_rate": 5.696438237073517e-06, "loss": 0.7989, "step": 13359 }, { "epoch": 0.47189929753762877, "grad_norm": 1.658050775527954, "learning_rate": 5.695871798612182e-06, "loss": 0.812, "step": 13360 }, { "epoch": 0.4719346193413367, "grad_norm": 1.6854208707809448, "learning_rate": 5.695305351043408e-06, "loss": 0.7997, "step": 13361 }, { "epoch": 0.4719699411450446, "grad_norm": 1.9183052778244019, "learning_rate": 5.6947388943746085e-06, "loss": 0.7914, "step": 13362 }, { "epoch": 0.4720052629487525, "grad_norm": 1.7721831798553467, "learning_rate": 5.694172428613196e-06, "loss": 0.7682, "step": 13363 }, { "epoch": 0.47204058475246036, "grad_norm": 1.6662826538085938, "learning_rate": 5.693605953766586e-06, "loss": 0.8019, "step": 13364 }, { "epoch": 0.47207590655616827, "grad_norm": 2.1121768951416016, "learning_rate": 5.693039469842191e-06, "loss": 0.8062, "step": 13365 }, { "epoch": 0.4721112283598762, "grad_norm": 1.9689617156982422, "learning_rate": 5.6924729768474255e-06, "loss": 0.7827, "step": 13366 }, { "epoch": 0.4721465501635841, "grad_norm": 1.8227709531784058, "learning_rate": 5.691906474789706e-06, "loss": 0.77, "step": 13367 }, { "epoch": 0.472181871967292, "grad_norm": 1.930842399597168, "learning_rate": 5.6913399636764425e-06, "loss": 0.8231, "step": 13368 }, { "epoch": 0.4722171937709999, "grad_norm": 1.8796812295913696, "learning_rate": 5.690773443515052e-06, "loss": 0.7998, "step": 13369 }, { "epoch": 0.4722525155747078, "grad_norm": 2.0556118488311768, "learning_rate": 5.690206914312948e-06, "loss": 0.7874, "step": 13370 }, { "epoch": 0.47228783737841573, "grad_norm": 1.897062063217163, "learning_rate": 5.6896403760775455e-06, "loss": 0.8086, "step": 13371 }, { "epoch": 0.47232315918212364, "grad_norm": 1.9419249296188354, "learning_rate": 5.689073828816261e-06, "loss": 0.828, "step": 13372 }, { "epoch": 0.47235848098583155, "grad_norm": 1.7359918355941772, "learning_rate": 5.688507272536506e-06, "loss": 0.774, "step": 13373 }, { "epoch": 0.47239380278953946, "grad_norm": 1.9187490940093994, "learning_rate": 5.687940707245699e-06, "loss": 0.7775, "step": 13374 }, { "epoch": 0.4724291245932474, "grad_norm": 3.9649205207824707, "learning_rate": 5.687374132951251e-06, "loss": 0.8228, "step": 13375 }, { "epoch": 0.4724644463969553, "grad_norm": 3.1004879474639893, "learning_rate": 5.686807549660582e-06, "loss": 0.8213, "step": 13376 }, { "epoch": 0.47249976820066314, "grad_norm": 1.005124807357788, "learning_rate": 5.686240957381104e-06, "loss": 0.5763, "step": 13377 }, { "epoch": 0.47253509000437105, "grad_norm": 2.289191722869873, "learning_rate": 5.685674356120233e-06, "loss": 0.8077, "step": 13378 }, { "epoch": 0.47257041180807896, "grad_norm": 4.075315952301025, "learning_rate": 5.685107745885385e-06, "loss": 0.8293, "step": 13379 }, { "epoch": 0.47260573361178687, "grad_norm": 2.453746795654297, "learning_rate": 5.6845411266839756e-06, "loss": 0.8468, "step": 13380 }, { "epoch": 0.4726410554154948, "grad_norm": 4.163403034210205, "learning_rate": 5.683974498523421e-06, "loss": 0.8286, "step": 13381 }, { "epoch": 0.4726763772192027, "grad_norm": 4.177765846252441, "learning_rate": 5.683407861411136e-06, "loss": 0.8525, "step": 13382 }, { "epoch": 0.4727116990229106, "grad_norm": 6.733644485473633, "learning_rate": 5.6828412153545385e-06, "loss": 0.7686, "step": 13383 }, { "epoch": 0.4727470208266185, "grad_norm": 3.0042147636413574, "learning_rate": 5.682274560361043e-06, "loss": 0.8002, "step": 13384 }, { "epoch": 0.4727823426303264, "grad_norm": 2.8012006282806396, "learning_rate": 5.681707896438067e-06, "loss": 0.8304, "step": 13385 }, { "epoch": 0.47281766443403433, "grad_norm": 2.227900505065918, "learning_rate": 5.681141223593025e-06, "loss": 0.7867, "step": 13386 }, { "epoch": 0.47285298623774225, "grad_norm": 3.9169564247131348, "learning_rate": 5.680574541833335e-06, "loss": 0.795, "step": 13387 }, { "epoch": 0.47288830804145016, "grad_norm": 3.9593923091888428, "learning_rate": 5.680007851166414e-06, "loss": 0.8043, "step": 13388 }, { "epoch": 0.47292362984515807, "grad_norm": 4.142970085144043, "learning_rate": 5.679441151599677e-06, "loss": 0.8091, "step": 13389 }, { "epoch": 0.4729589516488659, "grad_norm": 4.556446075439453, "learning_rate": 5.678874443140542e-06, "loss": 0.7907, "step": 13390 }, { "epoch": 0.47299427345257383, "grad_norm": 2.4324705600738525, "learning_rate": 5.678307725796427e-06, "loss": 0.8269, "step": 13391 }, { "epoch": 0.47302959525628174, "grad_norm": 2.719291925430298, "learning_rate": 5.6777409995747475e-06, "loss": 0.8284, "step": 13392 }, { "epoch": 0.47306491705998965, "grad_norm": 1.881785273551941, "learning_rate": 5.67717426448292e-06, "loss": 0.7845, "step": 13393 }, { "epoch": 0.47310023886369756, "grad_norm": 2.9738142490386963, "learning_rate": 5.676607520528366e-06, "loss": 0.7956, "step": 13394 }, { "epoch": 0.4731355606674055, "grad_norm": 12.586957931518555, "learning_rate": 5.676040767718497e-06, "loss": 0.7521, "step": 13395 }, { "epoch": 0.4731708824711134, "grad_norm": 3.8098998069763184, "learning_rate": 5.675474006060735e-06, "loss": 0.7813, "step": 13396 }, { "epoch": 0.4732062042748213, "grad_norm": 2.7489171028137207, "learning_rate": 5.674907235562496e-06, "loss": 0.8314, "step": 13397 }, { "epoch": 0.4732415260785292, "grad_norm": 2.2598578929901123, "learning_rate": 5.674340456231196e-06, "loss": 0.7641, "step": 13398 }, { "epoch": 0.4732768478822371, "grad_norm": 2.247615098953247, "learning_rate": 5.673773668074257e-06, "loss": 0.826, "step": 13399 }, { "epoch": 0.47331216968594503, "grad_norm": 3.014721393585205, "learning_rate": 5.673206871099094e-06, "loss": 0.8528, "step": 13400 }, { "epoch": 0.47334749148965294, "grad_norm": 4.956636428833008, "learning_rate": 5.6726400653131266e-06, "loss": 0.7706, "step": 13401 }, { "epoch": 0.47338281329336085, "grad_norm": 3.1627371311187744, "learning_rate": 5.672073250723773e-06, "loss": 0.7989, "step": 13402 }, { "epoch": 0.4734181350970687, "grad_norm": 4.369777202606201, "learning_rate": 5.671506427338449e-06, "loss": 0.81, "step": 13403 }, { "epoch": 0.4734534569007766, "grad_norm": 3.0598368644714355, "learning_rate": 5.6709395951645784e-06, "loss": 0.8114, "step": 13404 }, { "epoch": 0.4734887787044845, "grad_norm": 3.3041141033172607, "learning_rate": 5.670372754209575e-06, "loss": 0.7774, "step": 13405 }, { "epoch": 0.47352410050819244, "grad_norm": 3.883984327316284, "learning_rate": 5.669805904480857e-06, "loss": 0.7821, "step": 13406 }, { "epoch": 0.47355942231190035, "grad_norm": 2.8496456146240234, "learning_rate": 5.669239045985848e-06, "loss": 0.8133, "step": 13407 }, { "epoch": 0.47359474411560826, "grad_norm": 2.831557512283325, "learning_rate": 5.668672178731962e-06, "loss": 0.8229, "step": 13408 }, { "epoch": 0.47363006591931617, "grad_norm": 2.5676825046539307, "learning_rate": 5.668105302726622e-06, "loss": 0.7838, "step": 13409 }, { "epoch": 0.4736653877230241, "grad_norm": 3.301055431365967, "learning_rate": 5.667538417977244e-06, "loss": 0.8271, "step": 13410 }, { "epoch": 0.473700709526732, "grad_norm": 2.0679826736450195, "learning_rate": 5.666971524491249e-06, "loss": 0.8049, "step": 13411 }, { "epoch": 0.4737360313304399, "grad_norm": 2.1316864490509033, "learning_rate": 5.666404622276057e-06, "loss": 0.7977, "step": 13412 }, { "epoch": 0.4737713531341478, "grad_norm": 1.9863897562026978, "learning_rate": 5.665837711339086e-06, "loss": 0.8024, "step": 13413 }, { "epoch": 0.4738066749378557, "grad_norm": 2.0081005096435547, "learning_rate": 5.665270791687756e-06, "loss": 0.7982, "step": 13414 }, { "epoch": 0.47384199674156363, "grad_norm": 2.0531435012817383, "learning_rate": 5.6647038633294885e-06, "loss": 0.7789, "step": 13415 }, { "epoch": 0.47387731854527154, "grad_norm": 2.553582191467285, "learning_rate": 5.6641369262716985e-06, "loss": 0.8126, "step": 13416 }, { "epoch": 0.4739126403489794, "grad_norm": 2.49666428565979, "learning_rate": 5.6635699805218115e-06, "loss": 0.8042, "step": 13417 }, { "epoch": 0.4739479621526873, "grad_norm": 2.1081414222717285, "learning_rate": 5.663003026087246e-06, "loss": 0.767, "step": 13418 }, { "epoch": 0.4739832839563952, "grad_norm": 1.9469598531723022, "learning_rate": 5.662436062975419e-06, "loss": 0.8134, "step": 13419 }, { "epoch": 0.47401860576010313, "grad_norm": 2.821538209915161, "learning_rate": 5.661869091193754e-06, "loss": 0.7623, "step": 13420 }, { "epoch": 0.47405392756381104, "grad_norm": 1.9904649257659912, "learning_rate": 5.661302110749674e-06, "loss": 0.7753, "step": 13421 }, { "epoch": 0.47408924936751895, "grad_norm": 2.3346219062805176, "learning_rate": 5.660735121650594e-06, "loss": 0.8193, "step": 13422 }, { "epoch": 0.47412457117122686, "grad_norm": 2.964367151260376, "learning_rate": 5.660168123903937e-06, "loss": 0.7849, "step": 13423 }, { "epoch": 0.47415989297493477, "grad_norm": 2.403606653213501, "learning_rate": 5.659601117517122e-06, "loss": 0.8077, "step": 13424 }, { "epoch": 0.4741952147786427, "grad_norm": 2.441351890563965, "learning_rate": 5.659034102497574e-06, "loss": 0.782, "step": 13425 }, { "epoch": 0.4742305365823506, "grad_norm": 5.81049919128418, "learning_rate": 5.658467078852709e-06, "loss": 0.8042, "step": 13426 }, { "epoch": 0.4742658583860585, "grad_norm": 2.49190354347229, "learning_rate": 5.6579000465899525e-06, "loss": 0.807, "step": 13427 }, { "epoch": 0.4743011801897664, "grad_norm": 2.327453851699829, "learning_rate": 5.657333005716724e-06, "loss": 0.7985, "step": 13428 }, { "epoch": 0.4743365019934743, "grad_norm": 4.138235092163086, "learning_rate": 5.656765956240443e-06, "loss": 0.8004, "step": 13429 }, { "epoch": 0.4743718237971822, "grad_norm": 4.001800060272217, "learning_rate": 5.656198898168534e-06, "loss": 0.8189, "step": 13430 }, { "epoch": 0.4744071456008901, "grad_norm": 2.779937744140625, "learning_rate": 5.655631831508419e-06, "loss": 0.8363, "step": 13431 }, { "epoch": 0.474442467404598, "grad_norm": 2.881251335144043, "learning_rate": 5.655064756267516e-06, "loss": 0.8011, "step": 13432 }, { "epoch": 0.4744777892083059, "grad_norm": 1.9715667963027954, "learning_rate": 5.654497672453247e-06, "loss": 0.8021, "step": 13433 }, { "epoch": 0.4745131110120138, "grad_norm": 3.079272747039795, "learning_rate": 5.653930580073037e-06, "loss": 0.7971, "step": 13434 }, { "epoch": 0.47454843281572173, "grad_norm": 2.781128168106079, "learning_rate": 5.6533634791343065e-06, "loss": 0.8111, "step": 13435 }, { "epoch": 0.47458375461942964, "grad_norm": 1.752010464668274, "learning_rate": 5.652796369644478e-06, "loss": 0.7855, "step": 13436 }, { "epoch": 0.47461907642313755, "grad_norm": 1.527247428894043, "learning_rate": 5.652229251610972e-06, "loss": 0.7841, "step": 13437 }, { "epoch": 0.47465439822684546, "grad_norm": 1.9212934970855713, "learning_rate": 5.651662125041214e-06, "loss": 0.7975, "step": 13438 }, { "epoch": 0.4746897200305534, "grad_norm": 1.5768065452575684, "learning_rate": 5.6510949899426235e-06, "loss": 0.7813, "step": 13439 }, { "epoch": 0.4747250418342613, "grad_norm": 2.6033599376678467, "learning_rate": 5.650527846322625e-06, "loss": 0.7597, "step": 13440 }, { "epoch": 0.4747603636379692, "grad_norm": 1.8569016456604004, "learning_rate": 5.64996069418864e-06, "loss": 0.7836, "step": 13441 }, { "epoch": 0.4747956854416771, "grad_norm": 3.5976760387420654, "learning_rate": 5.64939353354809e-06, "loss": 0.8, "step": 13442 }, { "epoch": 0.47483100724538496, "grad_norm": 2.0386440753936768, "learning_rate": 5.6488263644084015e-06, "loss": 0.7884, "step": 13443 }, { "epoch": 0.4748663290490929, "grad_norm": 2.2949280738830566, "learning_rate": 5.648259186776996e-06, "loss": 0.8002, "step": 13444 }, { "epoch": 0.4749016508528008, "grad_norm": 1.8979651927947998, "learning_rate": 5.647692000661294e-06, "loss": 0.8081, "step": 13445 }, { "epoch": 0.4749369726565087, "grad_norm": 2.2805309295654297, "learning_rate": 5.6471248060687216e-06, "loss": 0.7822, "step": 13446 }, { "epoch": 0.4749722944602166, "grad_norm": 5.321018695831299, "learning_rate": 5.646557603006702e-06, "loss": 0.7556, "step": 13447 }, { "epoch": 0.4750076162639245, "grad_norm": 2.7123303413391113, "learning_rate": 5.6459903914826565e-06, "loss": 0.8082, "step": 13448 }, { "epoch": 0.4750429380676324, "grad_norm": 5.334936141967773, "learning_rate": 5.645423171504012e-06, "loss": 0.8175, "step": 13449 }, { "epoch": 0.47507825987134034, "grad_norm": 2.6601178646087646, "learning_rate": 5.644855943078189e-06, "loss": 0.8115, "step": 13450 }, { "epoch": 0.47511358167504825, "grad_norm": 5.586535930633545, "learning_rate": 5.6442887062126135e-06, "loss": 0.7534, "step": 13451 }, { "epoch": 0.47514890347875616, "grad_norm": 0.9206470251083374, "learning_rate": 5.6437214609147085e-06, "loss": 0.5783, "step": 13452 }, { "epoch": 0.47518422528246407, "grad_norm": 2.7905185222625732, "learning_rate": 5.643154207191898e-06, "loss": 0.8302, "step": 13453 }, { "epoch": 0.475219547086172, "grad_norm": 2.0535738468170166, "learning_rate": 5.642586945051606e-06, "loss": 0.7897, "step": 13454 }, { "epoch": 0.4752548688898799, "grad_norm": 4.552063941955566, "learning_rate": 5.642019674501257e-06, "loss": 0.8267, "step": 13455 }, { "epoch": 0.47529019069358774, "grad_norm": 2.0046303272247314, "learning_rate": 5.641452395548275e-06, "loss": 0.8143, "step": 13456 }, { "epoch": 0.47532551249729565, "grad_norm": 1.976172685623169, "learning_rate": 5.640885108200084e-06, "loss": 0.8007, "step": 13457 }, { "epoch": 0.47536083430100357, "grad_norm": 2.2415823936462402, "learning_rate": 5.640317812464109e-06, "loss": 0.836, "step": 13458 }, { "epoch": 0.4753961561047115, "grad_norm": 2.2189600467681885, "learning_rate": 5.639750508347777e-06, "loss": 0.8146, "step": 13459 }, { "epoch": 0.4754314779084194, "grad_norm": 2.3720006942749023, "learning_rate": 5.63918319585851e-06, "loss": 0.8231, "step": 13460 }, { "epoch": 0.4754667997121273, "grad_norm": 2.13107967376709, "learning_rate": 5.638615875003733e-06, "loss": 0.8058, "step": 13461 }, { "epoch": 0.4755021215158352, "grad_norm": 2.1462135314941406, "learning_rate": 5.638048545790871e-06, "loss": 0.8351, "step": 13462 }, { "epoch": 0.4755374433195431, "grad_norm": 2.2767598628997803, "learning_rate": 5.637481208227348e-06, "loss": 0.8025, "step": 13463 }, { "epoch": 0.47557276512325103, "grad_norm": 3.8133394718170166, "learning_rate": 5.636913862320593e-06, "loss": 0.7812, "step": 13464 }, { "epoch": 0.47560808692695894, "grad_norm": 2.5641210079193115, "learning_rate": 5.636346508078029e-06, "loss": 0.771, "step": 13465 }, { "epoch": 0.47564340873066685, "grad_norm": 3.2345802783966064, "learning_rate": 5.635779145507081e-06, "loss": 0.7705, "step": 13466 }, { "epoch": 0.47567873053437476, "grad_norm": 2.3408751487731934, "learning_rate": 5.635211774615175e-06, "loss": 0.8229, "step": 13467 }, { "epoch": 0.47571405233808267, "grad_norm": 2.665703296661377, "learning_rate": 5.634644395409737e-06, "loss": 0.8307, "step": 13468 }, { "epoch": 0.4757493741417905, "grad_norm": 2.2870421409606934, "learning_rate": 5.634077007898193e-06, "loss": 0.8006, "step": 13469 }, { "epoch": 0.47578469594549844, "grad_norm": 2.5443532466888428, "learning_rate": 5.633509612087969e-06, "loss": 0.8394, "step": 13470 }, { "epoch": 0.47582001774920635, "grad_norm": 2.1082868576049805, "learning_rate": 5.632942207986487e-06, "loss": 0.7901, "step": 13471 }, { "epoch": 0.47585533955291426, "grad_norm": 2.746680498123169, "learning_rate": 5.632374795601179e-06, "loss": 0.7696, "step": 13472 }, { "epoch": 0.47589066135662217, "grad_norm": 4.215641498565674, "learning_rate": 5.631807374939469e-06, "loss": 0.7971, "step": 13473 }, { "epoch": 0.4759259831603301, "grad_norm": 6.723684787750244, "learning_rate": 5.63123994600878e-06, "loss": 0.7982, "step": 13474 }, { "epoch": 0.475961304964038, "grad_norm": 2.6744229793548584, "learning_rate": 5.630672508816544e-06, "loss": 0.8116, "step": 13475 }, { "epoch": 0.4759966267677459, "grad_norm": 3.4440975189208984, "learning_rate": 5.630105063370184e-06, "loss": 0.7965, "step": 13476 }, { "epoch": 0.4760319485714538, "grad_norm": 3.395171880722046, "learning_rate": 5.629537609677126e-06, "loss": 0.8374, "step": 13477 }, { "epoch": 0.4760672703751617, "grad_norm": 2.338303327560425, "learning_rate": 5.6289701477448e-06, "loss": 0.8021, "step": 13478 }, { "epoch": 0.47610259217886963, "grad_norm": 2.182103395462036, "learning_rate": 5.62840267758063e-06, "loss": 0.7671, "step": 13479 }, { "epoch": 0.47613791398257754, "grad_norm": 2.621777057647705, "learning_rate": 5.627835199192044e-06, "loss": 0.8212, "step": 13480 }, { "epoch": 0.47617323578628545, "grad_norm": 4.72489070892334, "learning_rate": 5.62726771258647e-06, "loss": 0.8264, "step": 13481 }, { "epoch": 0.4762085575899933, "grad_norm": 5.557137966156006, "learning_rate": 5.626700217771331e-06, "loss": 0.7759, "step": 13482 }, { "epoch": 0.4762438793937012, "grad_norm": 2.691469669342041, "learning_rate": 5.62613271475406e-06, "loss": 0.8145, "step": 13483 }, { "epoch": 0.47627920119740913, "grad_norm": 2.6353039741516113, "learning_rate": 5.625565203542082e-06, "loss": 0.7976, "step": 13484 }, { "epoch": 0.47631452300111704, "grad_norm": 2.822183847427368, "learning_rate": 5.6249976841428225e-06, "loss": 0.7834, "step": 13485 }, { "epoch": 0.47634984480482495, "grad_norm": 7.80378532409668, "learning_rate": 5.624430156563712e-06, "loss": 0.8042, "step": 13486 }, { "epoch": 0.47638516660853286, "grad_norm": 3.9525771141052246, "learning_rate": 5.6238626208121775e-06, "loss": 0.8299, "step": 13487 }, { "epoch": 0.4764204884122408, "grad_norm": 3.061434030532837, "learning_rate": 5.623295076895645e-06, "loss": 0.8204, "step": 13488 }, { "epoch": 0.4764558102159487, "grad_norm": 3.9351165294647217, "learning_rate": 5.622727524821545e-06, "loss": 0.7633, "step": 13489 }, { "epoch": 0.4764911320196566, "grad_norm": 3.3450393676757812, "learning_rate": 5.622159964597302e-06, "loss": 0.77, "step": 13490 }, { "epoch": 0.4765264538233645, "grad_norm": 7.855093955993652, "learning_rate": 5.6215923962303485e-06, "loss": 0.8085, "step": 13491 }, { "epoch": 0.4765617756270724, "grad_norm": 2.425615072250366, "learning_rate": 5.621024819728109e-06, "loss": 0.7773, "step": 13492 }, { "epoch": 0.4765970974307803, "grad_norm": 14.710315704345703, "learning_rate": 5.620457235098015e-06, "loss": 0.786, "step": 13493 }, { "epoch": 0.47663241923448824, "grad_norm": 2.8330488204956055, "learning_rate": 5.619889642347492e-06, "loss": 0.7798, "step": 13494 }, { "epoch": 0.4766677410381961, "grad_norm": 3.8943114280700684, "learning_rate": 5.61932204148397e-06, "loss": 0.784, "step": 13495 }, { "epoch": 0.476703062841904, "grad_norm": 2.306597948074341, "learning_rate": 5.618754432514879e-06, "loss": 0.7909, "step": 13496 }, { "epoch": 0.4767383846456119, "grad_norm": 3.0830113887786865, "learning_rate": 5.618186815447645e-06, "loss": 0.8331, "step": 13497 }, { "epoch": 0.4767737064493198, "grad_norm": 2.2376015186309814, "learning_rate": 5.6176191902896986e-06, "loss": 0.8157, "step": 13498 }, { "epoch": 0.47680902825302773, "grad_norm": 3.5637047290802, "learning_rate": 5.617051557048468e-06, "loss": 0.831, "step": 13499 }, { "epoch": 0.47684435005673564, "grad_norm": 2.815565824508667, "learning_rate": 5.616483915731382e-06, "loss": 0.7877, "step": 13500 }, { "epoch": 0.47687967186044355, "grad_norm": 1.7453898191452026, "learning_rate": 5.615916266345871e-06, "loss": 0.7718, "step": 13501 }, { "epoch": 0.47691499366415147, "grad_norm": 2.3321666717529297, "learning_rate": 5.615348608899365e-06, "loss": 0.7904, "step": 13502 }, { "epoch": 0.4769503154678594, "grad_norm": 2.251613140106201, "learning_rate": 5.61478094339929e-06, "loss": 0.7947, "step": 13503 }, { "epoch": 0.4769856372715673, "grad_norm": 2.078519821166992, "learning_rate": 5.614213269853078e-06, "loss": 0.7667, "step": 13504 }, { "epoch": 0.4770209590752752, "grad_norm": 1.6295580863952637, "learning_rate": 5.613645588268159e-06, "loss": 0.7818, "step": 13505 }, { "epoch": 0.4770562808789831, "grad_norm": 1.618360161781311, "learning_rate": 5.613077898651963e-06, "loss": 0.7852, "step": 13506 }, { "epoch": 0.477091602682691, "grad_norm": 1.6594996452331543, "learning_rate": 5.612510201011917e-06, "loss": 0.7688, "step": 13507 }, { "epoch": 0.4771269244863989, "grad_norm": 1.7604117393493652, "learning_rate": 5.611942495355452e-06, "loss": 0.7956, "step": 13508 }, { "epoch": 0.4771622462901068, "grad_norm": 1.7520315647125244, "learning_rate": 5.61137478169e-06, "loss": 0.8336, "step": 13509 }, { "epoch": 0.4771975680938147, "grad_norm": 1.7148092985153198, "learning_rate": 5.6108070600229905e-06, "loss": 0.7629, "step": 13510 }, { "epoch": 0.4772328898975226, "grad_norm": 2.7032673358917236, "learning_rate": 5.610239330361851e-06, "loss": 0.8005, "step": 13511 }, { "epoch": 0.4772682117012305, "grad_norm": 1.5662682056427002, "learning_rate": 5.609671592714014e-06, "loss": 0.8078, "step": 13512 }, { "epoch": 0.4773035335049384, "grad_norm": 4.039209365844727, "learning_rate": 5.609103847086911e-06, "loss": 0.8093, "step": 13513 }, { "epoch": 0.47733885530864634, "grad_norm": 1.922263264656067, "learning_rate": 5.608536093487971e-06, "loss": 0.7595, "step": 13514 }, { "epoch": 0.47737417711235425, "grad_norm": 1.5793614387512207, "learning_rate": 5.607968331924626e-06, "loss": 0.7979, "step": 13515 }, { "epoch": 0.47740949891606216, "grad_norm": 1.6017265319824219, "learning_rate": 5.607400562404304e-06, "loss": 0.8103, "step": 13516 }, { "epoch": 0.47744482071977007, "grad_norm": 1.5112673044204712, "learning_rate": 5.606832784934438e-06, "loss": 0.7683, "step": 13517 }, { "epoch": 0.477480142523478, "grad_norm": 1.5600169897079468, "learning_rate": 5.606264999522459e-06, "loss": 0.7874, "step": 13518 }, { "epoch": 0.4775154643271859, "grad_norm": 1.7857800722122192, "learning_rate": 5.605697206175796e-06, "loss": 0.7891, "step": 13519 }, { "epoch": 0.4775507861308938, "grad_norm": 1.6893761157989502, "learning_rate": 5.6051294049018845e-06, "loss": 0.782, "step": 13520 }, { "epoch": 0.47758610793460166, "grad_norm": 1.5269564390182495, "learning_rate": 5.604561595708151e-06, "loss": 0.7815, "step": 13521 }, { "epoch": 0.47762142973830957, "grad_norm": 1.5506256818771362, "learning_rate": 5.6039937786020305e-06, "loss": 0.8194, "step": 13522 }, { "epoch": 0.4776567515420175, "grad_norm": 1.8930987119674683, "learning_rate": 5.603425953590953e-06, "loss": 0.7654, "step": 13523 }, { "epoch": 0.4776920733457254, "grad_norm": 1.8059298992156982, "learning_rate": 5.602858120682349e-06, "loss": 0.8025, "step": 13524 }, { "epoch": 0.4777273951494333, "grad_norm": 1.5852943658828735, "learning_rate": 5.6022902798836524e-06, "loss": 0.7752, "step": 13525 }, { "epoch": 0.4777627169531412, "grad_norm": 1.8511486053466797, "learning_rate": 5.6017224312022925e-06, "loss": 0.7994, "step": 13526 }, { "epoch": 0.4777980387568491, "grad_norm": 1.808424472808838, "learning_rate": 5.601154574645703e-06, "loss": 0.7968, "step": 13527 }, { "epoch": 0.47783336056055703, "grad_norm": 1.7142009735107422, "learning_rate": 5.600586710221317e-06, "loss": 0.811, "step": 13528 }, { "epoch": 0.47786868236426494, "grad_norm": 1.6530537605285645, "learning_rate": 5.600018837936562e-06, "loss": 0.7807, "step": 13529 }, { "epoch": 0.47790400416797285, "grad_norm": 1.605708360671997, "learning_rate": 5.599450957798874e-06, "loss": 0.8159, "step": 13530 }, { "epoch": 0.47793932597168076, "grad_norm": 1.5442184209823608, "learning_rate": 5.598883069815686e-06, "loss": 0.8081, "step": 13531 }, { "epoch": 0.4779746477753887, "grad_norm": 1.906326413154602, "learning_rate": 5.598315173994428e-06, "loss": 0.8273, "step": 13532 }, { "epoch": 0.4780099695790966, "grad_norm": 1.525760531425476, "learning_rate": 5.597747270342535e-06, "loss": 0.7644, "step": 13533 }, { "epoch": 0.47804529138280444, "grad_norm": 1.5890297889709473, "learning_rate": 5.5971793588674365e-06, "loss": 0.7392, "step": 13534 }, { "epoch": 0.47808061318651235, "grad_norm": 1.6452754735946655, "learning_rate": 5.596611439576567e-06, "loss": 0.7927, "step": 13535 }, { "epoch": 0.47811593499022026, "grad_norm": 1.5197209119796753, "learning_rate": 5.59604351247736e-06, "loss": 0.7576, "step": 13536 }, { "epoch": 0.47815125679392817, "grad_norm": 1.9135422706604004, "learning_rate": 5.595475577577246e-06, "loss": 0.7668, "step": 13537 }, { "epoch": 0.4781865785976361, "grad_norm": 1.7805119752883911, "learning_rate": 5.59490763488366e-06, "loss": 0.8021, "step": 13538 }, { "epoch": 0.478221900401344, "grad_norm": 1.5770114660263062, "learning_rate": 5.594339684404037e-06, "loss": 0.8008, "step": 13539 }, { "epoch": 0.4782572222050519, "grad_norm": 1.7614006996154785, "learning_rate": 5.593771726145805e-06, "loss": 0.7896, "step": 13540 }, { "epoch": 0.4782925440087598, "grad_norm": 1.966564655303955, "learning_rate": 5.593203760116401e-06, "loss": 0.8594, "step": 13541 }, { "epoch": 0.4783278658124677, "grad_norm": 2.4103379249572754, "learning_rate": 5.592635786323259e-06, "loss": 0.7924, "step": 13542 }, { "epoch": 0.47836318761617563, "grad_norm": 2.0099005699157715, "learning_rate": 5.592067804773811e-06, "loss": 0.7977, "step": 13543 }, { "epoch": 0.47839850941988354, "grad_norm": 1.6756640672683716, "learning_rate": 5.591499815475489e-06, "loss": 0.7728, "step": 13544 }, { "epoch": 0.47843383122359145, "grad_norm": 1.7125601768493652, "learning_rate": 5.59093181843573e-06, "loss": 0.7903, "step": 13545 }, { "epoch": 0.47846915302729937, "grad_norm": 1.9115420579910278, "learning_rate": 5.590363813661966e-06, "loss": 0.8044, "step": 13546 }, { "epoch": 0.4785044748310072, "grad_norm": 3.708315372467041, "learning_rate": 5.589795801161632e-06, "loss": 0.8363, "step": 13547 }, { "epoch": 0.47853979663471513, "grad_norm": 1.608546495437622, "learning_rate": 5.58922778094216e-06, "loss": 0.7783, "step": 13548 }, { "epoch": 0.47857511843842304, "grad_norm": 1.5113120079040527, "learning_rate": 5.588659753010987e-06, "loss": 0.79, "step": 13549 }, { "epoch": 0.47861044024213095, "grad_norm": 1.6086976528167725, "learning_rate": 5.588091717375544e-06, "loss": 0.7963, "step": 13550 }, { "epoch": 0.47864576204583886, "grad_norm": 1.6472582817077637, "learning_rate": 5.58752367404327e-06, "loss": 0.8165, "step": 13551 }, { "epoch": 0.4786810838495468, "grad_norm": 1.779245376586914, "learning_rate": 5.586955623021596e-06, "loss": 0.8251, "step": 13552 }, { "epoch": 0.4787164056532547, "grad_norm": 1.5433201789855957, "learning_rate": 5.586387564317956e-06, "loss": 0.758, "step": 13553 }, { "epoch": 0.4787517274569626, "grad_norm": 1.6967206001281738, "learning_rate": 5.585819497939786e-06, "loss": 0.8047, "step": 13554 }, { "epoch": 0.4787870492606705, "grad_norm": 1.5438084602355957, "learning_rate": 5.5852514238945195e-06, "loss": 0.7923, "step": 13555 }, { "epoch": 0.4788223710643784, "grad_norm": 1.5884376764297485, "learning_rate": 5.584683342189594e-06, "loss": 0.8095, "step": 13556 }, { "epoch": 0.4788576928680863, "grad_norm": 1.6920851469039917, "learning_rate": 5.584115252832443e-06, "loss": 0.7989, "step": 13557 }, { "epoch": 0.47889301467179424, "grad_norm": 1.6905386447906494, "learning_rate": 5.5835471558305e-06, "loss": 0.8262, "step": 13558 }, { "epoch": 0.47892833647550215, "grad_norm": 1.7417851686477661, "learning_rate": 5.582979051191202e-06, "loss": 0.8287, "step": 13559 }, { "epoch": 0.47896365827921, "grad_norm": 1.5311304330825806, "learning_rate": 5.582410938921985e-06, "loss": 0.8114, "step": 13560 }, { "epoch": 0.4789989800829179, "grad_norm": 1.9608556032180786, "learning_rate": 5.581842819030282e-06, "loss": 0.8145, "step": 13561 }, { "epoch": 0.4790343018866258, "grad_norm": 1.482114553451538, "learning_rate": 5.581274691523529e-06, "loss": 0.7789, "step": 13562 }, { "epoch": 0.47906962369033373, "grad_norm": 1.6249911785125732, "learning_rate": 5.580706556409164e-06, "loss": 0.8441, "step": 13563 }, { "epoch": 0.47910494549404165, "grad_norm": 1.7150322198867798, "learning_rate": 5.580138413694619e-06, "loss": 0.7674, "step": 13564 }, { "epoch": 0.47914026729774956, "grad_norm": 1.536339521408081, "learning_rate": 5.579570263387331e-06, "loss": 0.8049, "step": 13565 }, { "epoch": 0.47917558910145747, "grad_norm": 1.7876938581466675, "learning_rate": 5.579002105494737e-06, "loss": 0.8073, "step": 13566 }, { "epoch": 0.4792109109051654, "grad_norm": 1.7116873264312744, "learning_rate": 5.578433940024274e-06, "loss": 0.8342, "step": 13567 }, { "epoch": 0.4792462327088733, "grad_norm": 1.8483612537384033, "learning_rate": 5.577865766983375e-06, "loss": 0.8096, "step": 13568 }, { "epoch": 0.4792815545125812, "grad_norm": 1.5293604135513306, "learning_rate": 5.577297586379476e-06, "loss": 0.7862, "step": 13569 }, { "epoch": 0.4793168763162891, "grad_norm": 3.2001497745513916, "learning_rate": 5.576729398220018e-06, "loss": 0.771, "step": 13570 }, { "epoch": 0.479352198119997, "grad_norm": 1.6952928304672241, "learning_rate": 5.576161202512431e-06, "loss": 0.8066, "step": 13571 }, { "epoch": 0.47938751992370493, "grad_norm": 1.7288379669189453, "learning_rate": 5.575592999264157e-06, "loss": 0.8089, "step": 13572 }, { "epoch": 0.4794228417274128, "grad_norm": 1.6853399276733398, "learning_rate": 5.575024788482628e-06, "loss": 0.8012, "step": 13573 }, { "epoch": 0.4794581635311207, "grad_norm": 1.5905866622924805, "learning_rate": 5.574456570175283e-06, "loss": 0.7895, "step": 13574 }, { "epoch": 0.4794934853348286, "grad_norm": 1.549227237701416, "learning_rate": 5.573888344349558e-06, "loss": 0.7832, "step": 13575 }, { "epoch": 0.4795288071385365, "grad_norm": 1.7029762268066406, "learning_rate": 5.573320111012892e-06, "loss": 0.8502, "step": 13576 }, { "epoch": 0.4795641289422444, "grad_norm": 1.6100984811782837, "learning_rate": 5.572751870172718e-06, "loss": 0.7832, "step": 13577 }, { "epoch": 0.47959945074595234, "grad_norm": 1.5495340824127197, "learning_rate": 5.572183621836477e-06, "loss": 0.7711, "step": 13578 }, { "epoch": 0.47963477254966025, "grad_norm": 1.5634857416152954, "learning_rate": 5.571615366011604e-06, "loss": 0.7988, "step": 13579 }, { "epoch": 0.47967009435336816, "grad_norm": 1.5462257862091064, "learning_rate": 5.571047102705537e-06, "loss": 0.7843, "step": 13580 }, { "epoch": 0.47970541615707607, "grad_norm": 1.905153512954712, "learning_rate": 5.5704788319257116e-06, "loss": 0.7708, "step": 13581 }, { "epoch": 0.479740737960784, "grad_norm": 1.6238553524017334, "learning_rate": 5.569910553679566e-06, "loss": 0.802, "step": 13582 }, { "epoch": 0.4797760597644919, "grad_norm": 1.754704236984253, "learning_rate": 5.5693422679745405e-06, "loss": 0.8325, "step": 13583 }, { "epoch": 0.4798113815681998, "grad_norm": 1.6072001457214355, "learning_rate": 5.568773974818067e-06, "loss": 0.8016, "step": 13584 }, { "epoch": 0.4798467033719077, "grad_norm": 1.5199514627456665, "learning_rate": 5.5682056742175896e-06, "loss": 0.7617, "step": 13585 }, { "epoch": 0.47988202517561557, "grad_norm": 1.6307990550994873, "learning_rate": 5.567637366180543e-06, "loss": 0.7537, "step": 13586 }, { "epoch": 0.4799173469793235, "grad_norm": 1.6908587217330933, "learning_rate": 5.567069050714363e-06, "loss": 0.8148, "step": 13587 }, { "epoch": 0.4799526687830314, "grad_norm": 1.7034651041030884, "learning_rate": 5.566500727826493e-06, "loss": 0.8107, "step": 13588 }, { "epoch": 0.4799879905867393, "grad_norm": 2.246816396713257, "learning_rate": 5.565932397524365e-06, "loss": 0.8163, "step": 13589 }, { "epoch": 0.4800233123904472, "grad_norm": 1.7289122343063354, "learning_rate": 5.565364059815422e-06, "loss": 0.8403, "step": 13590 }, { "epoch": 0.4800586341941551, "grad_norm": 2.1241397857666016, "learning_rate": 5.5647957147071005e-06, "loss": 0.8034, "step": 13591 }, { "epoch": 0.48009395599786303, "grad_norm": 1.709250569343567, "learning_rate": 5.564227362206838e-06, "loss": 0.8221, "step": 13592 }, { "epoch": 0.48012927780157094, "grad_norm": 2.7797317504882812, "learning_rate": 5.563659002322074e-06, "loss": 0.8724, "step": 13593 }, { "epoch": 0.48016459960527885, "grad_norm": 2.617499351501465, "learning_rate": 5.563090635060248e-06, "loss": 0.7815, "step": 13594 }, { "epoch": 0.48019992140898676, "grad_norm": 2.576718330383301, "learning_rate": 5.5625222604287956e-06, "loss": 0.782, "step": 13595 }, { "epoch": 0.4802352432126947, "grad_norm": 2.055368661880493, "learning_rate": 5.561953878435159e-06, "loss": 0.7975, "step": 13596 }, { "epoch": 0.4802705650164026, "grad_norm": 2.086132764816284, "learning_rate": 5.561385489086774e-06, "loss": 0.8689, "step": 13597 }, { "epoch": 0.4803058868201105, "grad_norm": 1.6359736919403076, "learning_rate": 5.560817092391084e-06, "loss": 0.8234, "step": 13598 }, { "epoch": 0.48034120862381835, "grad_norm": 1.5676310062408447, "learning_rate": 5.560248688355524e-06, "loss": 0.7992, "step": 13599 }, { "epoch": 0.48037653042752626, "grad_norm": 5.092150688171387, "learning_rate": 5.559680276987533e-06, "loss": 0.7866, "step": 13600 }, { "epoch": 0.48041185223123417, "grad_norm": 1.6418495178222656, "learning_rate": 5.559111858294554e-06, "loss": 0.7944, "step": 13601 }, { "epoch": 0.4804471740349421, "grad_norm": 1.6685231924057007, "learning_rate": 5.558543432284024e-06, "loss": 0.8153, "step": 13602 }, { "epoch": 0.48048249583865, "grad_norm": 1.726375937461853, "learning_rate": 5.5579749989633815e-06, "loss": 0.8106, "step": 13603 }, { "epoch": 0.4805178176423579, "grad_norm": 1.6823042631149292, "learning_rate": 5.5574065583400674e-06, "loss": 0.7608, "step": 13604 }, { "epoch": 0.4805531394460658, "grad_norm": 1.6053245067596436, "learning_rate": 5.556838110421521e-06, "loss": 0.8021, "step": 13605 }, { "epoch": 0.4805884612497737, "grad_norm": 1.6194002628326416, "learning_rate": 5.556269655215182e-06, "loss": 0.8341, "step": 13606 }, { "epoch": 0.48062378305348163, "grad_norm": 1.7741254568099976, "learning_rate": 5.555701192728491e-06, "loss": 0.8343, "step": 13607 }, { "epoch": 0.48065910485718955, "grad_norm": 1.9022142887115479, "learning_rate": 5.5551327229688866e-06, "loss": 0.7927, "step": 13608 }, { "epoch": 0.48069442666089746, "grad_norm": 1.545577883720398, "learning_rate": 5.55456424594381e-06, "loss": 0.7758, "step": 13609 }, { "epoch": 0.48072974846460537, "grad_norm": 1.6918319463729858, "learning_rate": 5.5539957616607e-06, "loss": 0.7913, "step": 13610 }, { "epoch": 0.4807650702683133, "grad_norm": 1.5856820344924927, "learning_rate": 5.5534272701269974e-06, "loss": 0.7746, "step": 13611 }, { "epoch": 0.48080039207202113, "grad_norm": 1.7809982299804688, "learning_rate": 5.552858771350143e-06, "loss": 0.8243, "step": 13612 }, { "epoch": 0.48083571387572904, "grad_norm": 1.742913842201233, "learning_rate": 5.552290265337577e-06, "loss": 0.8089, "step": 13613 }, { "epoch": 0.48087103567943695, "grad_norm": 1.5704044103622437, "learning_rate": 5.55172175209674e-06, "loss": 0.8117, "step": 13614 }, { "epoch": 0.48090635748314486, "grad_norm": 1.634759783744812, "learning_rate": 5.551153231635074e-06, "loss": 0.8175, "step": 13615 }, { "epoch": 0.4809416792868528, "grad_norm": 1.6572315692901611, "learning_rate": 5.550584703960016e-06, "loss": 0.7519, "step": 13616 }, { "epoch": 0.4809770010905607, "grad_norm": 2.873250961303711, "learning_rate": 5.55001616907901e-06, "loss": 0.809, "step": 13617 }, { "epoch": 0.4810123228942686, "grad_norm": 1.751240611076355, "learning_rate": 5.549447626999494e-06, "loss": 0.8041, "step": 13618 }, { "epoch": 0.4810476446979765, "grad_norm": 1.7796109914779663, "learning_rate": 5.548879077728912e-06, "loss": 0.8091, "step": 13619 }, { "epoch": 0.4810829665016844, "grad_norm": 1.5876374244689941, "learning_rate": 5.548310521274705e-06, "loss": 0.7801, "step": 13620 }, { "epoch": 0.4811182883053923, "grad_norm": 1.6598297357559204, "learning_rate": 5.547741957644312e-06, "loss": 0.8049, "step": 13621 }, { "epoch": 0.48115361010910024, "grad_norm": 1.8544695377349854, "learning_rate": 5.547173386845175e-06, "loss": 0.8169, "step": 13622 }, { "epoch": 0.48118893191280815, "grad_norm": 1.8109712600708008, "learning_rate": 5.546604808884736e-06, "loss": 0.791, "step": 13623 }, { "epoch": 0.48122425371651606, "grad_norm": 1.757003664970398, "learning_rate": 5.546036223770435e-06, "loss": 0.8172, "step": 13624 }, { "epoch": 0.4812595755202239, "grad_norm": 1.827591896057129, "learning_rate": 5.545467631509715e-06, "loss": 0.836, "step": 13625 }, { "epoch": 0.4812948973239318, "grad_norm": 1.5173109769821167, "learning_rate": 5.544899032110017e-06, "loss": 0.7686, "step": 13626 }, { "epoch": 0.48133021912763974, "grad_norm": 1.5470072031021118, "learning_rate": 5.544330425578784e-06, "loss": 0.7717, "step": 13627 }, { "epoch": 0.48136554093134765, "grad_norm": 2.0207157135009766, "learning_rate": 5.543761811923456e-06, "loss": 0.8177, "step": 13628 }, { "epoch": 0.48140086273505556, "grad_norm": 1.5916612148284912, "learning_rate": 5.543193191151475e-06, "loss": 0.8336, "step": 13629 }, { "epoch": 0.48143618453876347, "grad_norm": 1.749630331993103, "learning_rate": 5.542624563270284e-06, "loss": 0.8209, "step": 13630 }, { "epoch": 0.4814715063424714, "grad_norm": 1.5127004384994507, "learning_rate": 5.5420559282873255e-06, "loss": 0.8091, "step": 13631 }, { "epoch": 0.4815068281461793, "grad_norm": 1.6327425241470337, "learning_rate": 5.541487286210039e-06, "loss": 0.8405, "step": 13632 }, { "epoch": 0.4815421499498872, "grad_norm": 2.0757720470428467, "learning_rate": 5.54091863704587e-06, "loss": 0.8031, "step": 13633 }, { "epoch": 0.4815774717535951, "grad_norm": 1.6999824047088623, "learning_rate": 5.54034998080226e-06, "loss": 0.8297, "step": 13634 }, { "epoch": 0.481612793557303, "grad_norm": 2.0694963932037354, "learning_rate": 5.539781317486651e-06, "loss": 0.8484, "step": 13635 }, { "epoch": 0.48164811536101093, "grad_norm": 1.658431053161621, "learning_rate": 5.539212647106486e-06, "loss": 0.8071, "step": 13636 }, { "epoch": 0.48168343716471884, "grad_norm": 1.6710481643676758, "learning_rate": 5.5386439696692055e-06, "loss": 0.8106, "step": 13637 }, { "epoch": 0.4817187589684267, "grad_norm": 1.5803364515304565, "learning_rate": 5.538075285182255e-06, "loss": 0.7981, "step": 13638 }, { "epoch": 0.4817540807721346, "grad_norm": 1.7202134132385254, "learning_rate": 5.537506593653077e-06, "loss": 0.8095, "step": 13639 }, { "epoch": 0.4817894025758425, "grad_norm": 0.9146881103515625, "learning_rate": 5.5369378950891115e-06, "loss": 0.5904, "step": 13640 }, { "epoch": 0.48182472437955043, "grad_norm": 1.7027651071548462, "learning_rate": 5.5363691894978055e-06, "loss": 0.8106, "step": 13641 }, { "epoch": 0.48186004618325834, "grad_norm": 1.7535995244979858, "learning_rate": 5.535800476886599e-06, "loss": 0.8115, "step": 13642 }, { "epoch": 0.48189536798696625, "grad_norm": 1.6077812910079956, "learning_rate": 5.535231757262939e-06, "loss": 0.7814, "step": 13643 }, { "epoch": 0.48193068979067416, "grad_norm": 1.6408742666244507, "learning_rate": 5.5346630306342655e-06, "loss": 0.8142, "step": 13644 }, { "epoch": 0.48196601159438207, "grad_norm": 1.667846918106079, "learning_rate": 5.5340942970080215e-06, "loss": 0.7806, "step": 13645 }, { "epoch": 0.48200133339809, "grad_norm": 1.581622838973999, "learning_rate": 5.533525556391653e-06, "loss": 0.7833, "step": 13646 }, { "epoch": 0.4820366552017979, "grad_norm": 1.447014570236206, "learning_rate": 5.532956808792601e-06, "loss": 0.7729, "step": 13647 }, { "epoch": 0.4820719770055058, "grad_norm": 1.79906165599823, "learning_rate": 5.5323880542183116e-06, "loss": 0.7709, "step": 13648 }, { "epoch": 0.4821072988092137, "grad_norm": 1.862823486328125, "learning_rate": 5.531819292676227e-06, "loss": 0.8354, "step": 13649 }, { "epoch": 0.4821426206129216, "grad_norm": 1.6283843517303467, "learning_rate": 5.531250524173791e-06, "loss": 0.7843, "step": 13650 }, { "epoch": 0.4821779424166295, "grad_norm": 1.5711874961853027, "learning_rate": 5.53068174871845e-06, "loss": 0.7982, "step": 13651 }, { "epoch": 0.4822132642203374, "grad_norm": 1.7168102264404297, "learning_rate": 5.530112966317645e-06, "loss": 0.8173, "step": 13652 }, { "epoch": 0.4822485860240453, "grad_norm": 1.9195289611816406, "learning_rate": 5.529544176978821e-06, "loss": 0.7784, "step": 13653 }, { "epoch": 0.4822839078277532, "grad_norm": 1.4962623119354248, "learning_rate": 5.528975380709421e-06, "loss": 0.7555, "step": 13654 }, { "epoch": 0.4823192296314611, "grad_norm": 1.6417500972747803, "learning_rate": 5.528406577516892e-06, "loss": 0.8001, "step": 13655 }, { "epoch": 0.48235455143516903, "grad_norm": 1.5104354619979858, "learning_rate": 5.527837767408677e-06, "loss": 0.7605, "step": 13656 }, { "epoch": 0.48238987323887694, "grad_norm": 1.7118827104568481, "learning_rate": 5.527268950392221e-06, "loss": 0.8242, "step": 13657 }, { "epoch": 0.48242519504258485, "grad_norm": 1.890342354774475, "learning_rate": 5.526700126474967e-06, "loss": 0.8019, "step": 13658 }, { "epoch": 0.48246051684629276, "grad_norm": 1.6919360160827637, "learning_rate": 5.526131295664361e-06, "loss": 0.8152, "step": 13659 }, { "epoch": 0.4824958386500007, "grad_norm": 1.773500919342041, "learning_rate": 5.5255624579678475e-06, "loss": 0.8163, "step": 13660 }, { "epoch": 0.4825311604537086, "grad_norm": 1.7401858568191528, "learning_rate": 5.524993613392871e-06, "loss": 0.8222, "step": 13661 }, { "epoch": 0.4825664822574165, "grad_norm": 1.6575393676757812, "learning_rate": 5.524424761946878e-06, "loss": 0.7958, "step": 13662 }, { "epoch": 0.4826018040611244, "grad_norm": 1.6503733396530151, "learning_rate": 5.5238559036373105e-06, "loss": 0.813, "step": 13663 }, { "epoch": 0.48263712586483226, "grad_norm": 1.7700364589691162, "learning_rate": 5.523287038471618e-06, "loss": 0.829, "step": 13664 }, { "epoch": 0.48267244766854017, "grad_norm": 1.8085533380508423, "learning_rate": 5.522718166457242e-06, "loss": 0.7962, "step": 13665 }, { "epoch": 0.4827077694722481, "grad_norm": 2.380913019180298, "learning_rate": 5.522149287601627e-06, "loss": 0.8054, "step": 13666 }, { "epoch": 0.482743091275956, "grad_norm": 1.6672688722610474, "learning_rate": 5.5215804019122225e-06, "loss": 0.8011, "step": 13667 }, { "epoch": 0.4827784130796639, "grad_norm": 2.0250911712646484, "learning_rate": 5.521011509396471e-06, "loss": 0.8005, "step": 13668 }, { "epoch": 0.4828137348833718, "grad_norm": 2.3002865314483643, "learning_rate": 5.520442610061818e-06, "loss": 0.7816, "step": 13669 }, { "epoch": 0.4828490566870797, "grad_norm": 1.717147707939148, "learning_rate": 5.5198737039157115e-06, "loss": 0.7621, "step": 13670 }, { "epoch": 0.48288437849078764, "grad_norm": 1.8642827272415161, "learning_rate": 5.519304790965595e-06, "loss": 0.7872, "step": 13671 }, { "epoch": 0.48291970029449555, "grad_norm": 1.754944086074829, "learning_rate": 5.518735871218915e-06, "loss": 0.7661, "step": 13672 }, { "epoch": 0.48295502209820346, "grad_norm": 1.7524784803390503, "learning_rate": 5.518166944683118e-06, "loss": 0.8052, "step": 13673 }, { "epoch": 0.48299034390191137, "grad_norm": 1.770317554473877, "learning_rate": 5.517598011365648e-06, "loss": 0.8182, "step": 13674 }, { "epoch": 0.4830256657056193, "grad_norm": 1.8690321445465088, "learning_rate": 5.517029071273954e-06, "loss": 0.7902, "step": 13675 }, { "epoch": 0.4830609875093272, "grad_norm": 1.8241695165634155, "learning_rate": 5.516460124415479e-06, "loss": 0.8122, "step": 13676 }, { "epoch": 0.48309630931303504, "grad_norm": 1.6101024150848389, "learning_rate": 5.515891170797673e-06, "loss": 0.8198, "step": 13677 }, { "epoch": 0.48313163111674295, "grad_norm": 1.8172330856323242, "learning_rate": 5.515322210427979e-06, "loss": 0.7972, "step": 13678 }, { "epoch": 0.48316695292045087, "grad_norm": 1.6547825336456299, "learning_rate": 5.514753243313845e-06, "loss": 0.7585, "step": 13679 }, { "epoch": 0.4832022747241588, "grad_norm": 1.7108852863311768, "learning_rate": 5.514184269462718e-06, "loss": 0.8365, "step": 13680 }, { "epoch": 0.4832375965278667, "grad_norm": 1.5999654531478882, "learning_rate": 5.513615288882044e-06, "loss": 0.7605, "step": 13681 }, { "epoch": 0.4832729183315746, "grad_norm": 3.1650779247283936, "learning_rate": 5.513046301579269e-06, "loss": 0.7789, "step": 13682 }, { "epoch": 0.4833082401352825, "grad_norm": 1.6942201852798462, "learning_rate": 5.512477307561841e-06, "loss": 0.7913, "step": 13683 }, { "epoch": 0.4833435619389904, "grad_norm": 1.6947119235992432, "learning_rate": 5.511908306837205e-06, "loss": 0.7824, "step": 13684 }, { "epoch": 0.48337888374269833, "grad_norm": 1.9402390718460083, "learning_rate": 5.51133929941281e-06, "loss": 0.7569, "step": 13685 }, { "epoch": 0.48341420554640624, "grad_norm": 1.9177725315093994, "learning_rate": 5.510770285296103e-06, "loss": 0.8409, "step": 13686 }, { "epoch": 0.48344952735011415, "grad_norm": 1.5959676504135132, "learning_rate": 5.510201264494529e-06, "loss": 0.7918, "step": 13687 }, { "epoch": 0.48348484915382206, "grad_norm": 1.7835075855255127, "learning_rate": 5.5096322370155385e-06, "loss": 0.7949, "step": 13688 }, { "epoch": 0.48352017095752997, "grad_norm": 1.6753648519515991, "learning_rate": 5.509063202866577e-06, "loss": 0.8153, "step": 13689 }, { "epoch": 0.4835554927612378, "grad_norm": 1.5854443311691284, "learning_rate": 5.5084941620550915e-06, "loss": 0.8012, "step": 13690 }, { "epoch": 0.48359081456494574, "grad_norm": 1.5732101202011108, "learning_rate": 5.50792511458853e-06, "loss": 0.7956, "step": 13691 }, { "epoch": 0.48362613636865365, "grad_norm": 1.7053624391555786, "learning_rate": 5.5073560604743396e-06, "loss": 0.8114, "step": 13692 }, { "epoch": 0.48366145817236156, "grad_norm": 1.800463080406189, "learning_rate": 5.506786999719968e-06, "loss": 0.8237, "step": 13693 }, { "epoch": 0.48369677997606947, "grad_norm": 1.6055316925048828, "learning_rate": 5.506217932332865e-06, "loss": 0.8136, "step": 13694 }, { "epoch": 0.4837321017797774, "grad_norm": 2.1234071254730225, "learning_rate": 5.505648858320476e-06, "loss": 0.7639, "step": 13695 }, { "epoch": 0.4837674235834853, "grad_norm": 1.7771748304367065, "learning_rate": 5.5050797776902495e-06, "loss": 0.8264, "step": 13696 }, { "epoch": 0.4838027453871932, "grad_norm": 1.9681506156921387, "learning_rate": 5.504510690449635e-06, "loss": 0.8046, "step": 13697 }, { "epoch": 0.4838380671909011, "grad_norm": 1.6317766904830933, "learning_rate": 5.503941596606077e-06, "loss": 0.7947, "step": 13698 }, { "epoch": 0.483873388994609, "grad_norm": 1.6007707118988037, "learning_rate": 5.50337249616703e-06, "loss": 0.7575, "step": 13699 }, { "epoch": 0.48390871079831693, "grad_norm": 1.5663224458694458, "learning_rate": 5.5028033891399345e-06, "loss": 0.7778, "step": 13700 }, { "epoch": 0.48394403260202484, "grad_norm": 1.6343070268630981, "learning_rate": 5.502234275532245e-06, "loss": 0.7703, "step": 13701 }, { "epoch": 0.48397935440573275, "grad_norm": 1.6489229202270508, "learning_rate": 5.501665155351408e-06, "loss": 0.7901, "step": 13702 }, { "epoch": 0.4840146762094406, "grad_norm": 2.065966844558716, "learning_rate": 5.50109602860487e-06, "loss": 0.8192, "step": 13703 }, { "epoch": 0.4840499980131485, "grad_norm": 1.7472846508026123, "learning_rate": 5.5005268953000825e-06, "loss": 0.8335, "step": 13704 }, { "epoch": 0.48408531981685643, "grad_norm": 1.6995549201965332, "learning_rate": 5.499957755444492e-06, "loss": 0.7899, "step": 13705 }, { "epoch": 0.48412064162056434, "grad_norm": 1.6398309469223022, "learning_rate": 5.499388609045549e-06, "loss": 0.8215, "step": 13706 }, { "epoch": 0.48415596342427225, "grad_norm": 1.7597358226776123, "learning_rate": 5.498819456110702e-06, "loss": 0.7848, "step": 13707 }, { "epoch": 0.48419128522798016, "grad_norm": 1.5432287454605103, "learning_rate": 5.4982502966474005e-06, "loss": 0.8347, "step": 13708 }, { "epoch": 0.48422660703168807, "grad_norm": 1.6483373641967773, "learning_rate": 5.497681130663093e-06, "loss": 0.7905, "step": 13709 }, { "epoch": 0.484261928835396, "grad_norm": 1.6913834810256958, "learning_rate": 5.4971119581652255e-06, "loss": 0.8154, "step": 13710 }, { "epoch": 0.4842972506391039, "grad_norm": 1.8551630973815918, "learning_rate": 5.496542779161252e-06, "loss": 0.7728, "step": 13711 }, { "epoch": 0.4843325724428118, "grad_norm": 1.8474854230880737, "learning_rate": 5.495973593658621e-06, "loss": 0.7952, "step": 13712 }, { "epoch": 0.4843678942465197, "grad_norm": 1.6020281314849854, "learning_rate": 5.495404401664779e-06, "loss": 0.7802, "step": 13713 }, { "epoch": 0.4844032160502276, "grad_norm": 1.6784297227859497, "learning_rate": 5.494835203187178e-06, "loss": 0.7762, "step": 13714 }, { "epoch": 0.48443853785393554, "grad_norm": 1.6203182935714722, "learning_rate": 5.494265998233268e-06, "loss": 0.8189, "step": 13715 }, { "epoch": 0.4844738596576434, "grad_norm": 1.8353221416473389, "learning_rate": 5.493696786810495e-06, "loss": 0.7832, "step": 13716 }, { "epoch": 0.4845091814613513, "grad_norm": 1.7740182876586914, "learning_rate": 5.493127568926313e-06, "loss": 0.7828, "step": 13717 }, { "epoch": 0.4845445032650592, "grad_norm": 1.7873073816299438, "learning_rate": 5.492558344588171e-06, "loss": 0.7888, "step": 13718 }, { "epoch": 0.4845798250687671, "grad_norm": 1.6029797792434692, "learning_rate": 5.4919891138035165e-06, "loss": 0.7575, "step": 13719 }, { "epoch": 0.48461514687247503, "grad_norm": 1.4477570056915283, "learning_rate": 5.491419876579802e-06, "loss": 0.8007, "step": 13720 }, { "epoch": 0.48465046867618294, "grad_norm": 1.6688727140426636, "learning_rate": 5.4908506329244745e-06, "loss": 0.8078, "step": 13721 }, { "epoch": 0.48468579047989085, "grad_norm": 1.5813212394714355, "learning_rate": 5.490281382844988e-06, "loss": 0.7933, "step": 13722 }, { "epoch": 0.48472111228359877, "grad_norm": 1.6501494646072388, "learning_rate": 5.489712126348791e-06, "loss": 0.7722, "step": 13723 }, { "epoch": 0.4847564340873067, "grad_norm": 2.0421745777130127, "learning_rate": 5.489142863443332e-06, "loss": 0.8439, "step": 13724 }, { "epoch": 0.4847917558910146, "grad_norm": 1.9144822359085083, "learning_rate": 5.488573594136065e-06, "loss": 0.7802, "step": 13725 }, { "epoch": 0.4848270776947225, "grad_norm": 1.8676286935806274, "learning_rate": 5.488004318434438e-06, "loss": 0.8322, "step": 13726 }, { "epoch": 0.4848623994984304, "grad_norm": 0.9749801754951477, "learning_rate": 5.487435036345903e-06, "loss": 0.5903, "step": 13727 }, { "epoch": 0.4848977213021383, "grad_norm": 1.775667667388916, "learning_rate": 5.486865747877909e-06, "loss": 0.8177, "step": 13728 }, { "epoch": 0.4849330431058462, "grad_norm": 1.6630502939224243, "learning_rate": 5.486296453037907e-06, "loss": 0.8243, "step": 13729 }, { "epoch": 0.4849683649095541, "grad_norm": 1.8660650253295898, "learning_rate": 5.48572715183335e-06, "loss": 0.7931, "step": 13730 }, { "epoch": 0.485003686713262, "grad_norm": 1.6131458282470703, "learning_rate": 5.485157844271685e-06, "loss": 0.7878, "step": 13731 }, { "epoch": 0.4850390085169699, "grad_norm": 1.626452922821045, "learning_rate": 5.484588530360367e-06, "loss": 0.7707, "step": 13732 }, { "epoch": 0.4850743303206778, "grad_norm": 1.5636420249938965, "learning_rate": 5.484019210106846e-06, "loss": 0.783, "step": 13733 }, { "epoch": 0.4851096521243857, "grad_norm": 1.6892083883285522, "learning_rate": 5.48344988351857e-06, "loss": 0.7893, "step": 13734 }, { "epoch": 0.48514497392809364, "grad_norm": 1.774717926979065, "learning_rate": 5.4828805506029956e-06, "loss": 0.7927, "step": 13735 }, { "epoch": 0.48518029573180155, "grad_norm": 0.9991561770439148, "learning_rate": 5.482311211367572e-06, "loss": 0.6143, "step": 13736 }, { "epoch": 0.48521561753550946, "grad_norm": 0.9124531149864197, "learning_rate": 5.481741865819748e-06, "loss": 0.5945, "step": 13737 }, { "epoch": 0.48525093933921737, "grad_norm": 1.5530624389648438, "learning_rate": 5.481172513966977e-06, "loss": 0.7585, "step": 13738 }, { "epoch": 0.4852862611429253, "grad_norm": 1.5865397453308105, "learning_rate": 5.480603155816709e-06, "loss": 0.82, "step": 13739 }, { "epoch": 0.4853215829466332, "grad_norm": 1.9039479494094849, "learning_rate": 5.4800337913764e-06, "loss": 0.8137, "step": 13740 }, { "epoch": 0.4853569047503411, "grad_norm": 1.6209418773651123, "learning_rate": 5.4794644206534985e-06, "loss": 0.764, "step": 13741 }, { "epoch": 0.48539222655404896, "grad_norm": 1.5146559476852417, "learning_rate": 5.478895043655455e-06, "loss": 0.7892, "step": 13742 }, { "epoch": 0.48542754835775687, "grad_norm": 1.596990704536438, "learning_rate": 5.478325660389725e-06, "loss": 0.7994, "step": 13743 }, { "epoch": 0.4854628701614648, "grad_norm": 1.7771742343902588, "learning_rate": 5.477756270863759e-06, "loss": 0.8044, "step": 13744 }, { "epoch": 0.4854981919651727, "grad_norm": 2.007746934890747, "learning_rate": 5.477186875085008e-06, "loss": 0.7867, "step": 13745 }, { "epoch": 0.4855335137688806, "grad_norm": 1.715051293373108, "learning_rate": 5.476617473060926e-06, "loss": 0.7764, "step": 13746 }, { "epoch": 0.4855688355725885, "grad_norm": 1.69217848777771, "learning_rate": 5.476048064798961e-06, "loss": 0.7963, "step": 13747 }, { "epoch": 0.4856041573762964, "grad_norm": 1.9717388153076172, "learning_rate": 5.475478650306571e-06, "loss": 0.7818, "step": 13748 }, { "epoch": 0.48563947918000433, "grad_norm": 1.7119344472885132, "learning_rate": 5.4749092295912055e-06, "loss": 0.8242, "step": 13749 }, { "epoch": 0.48567480098371224, "grad_norm": 1.5595612525939941, "learning_rate": 5.474339802660316e-06, "loss": 0.7743, "step": 13750 }, { "epoch": 0.48571012278742015, "grad_norm": 1.7761927843093872, "learning_rate": 5.473770369521358e-06, "loss": 0.8179, "step": 13751 }, { "epoch": 0.48574544459112806, "grad_norm": 1.587024211883545, "learning_rate": 5.473200930181781e-06, "loss": 0.7774, "step": 13752 }, { "epoch": 0.48578076639483597, "grad_norm": 1.7169190645217896, "learning_rate": 5.472631484649039e-06, "loss": 0.8029, "step": 13753 }, { "epoch": 0.4858160881985439, "grad_norm": 1.8266632556915283, "learning_rate": 5.472062032930586e-06, "loss": 0.8176, "step": 13754 }, { "epoch": 0.48585141000225174, "grad_norm": 1.941514492034912, "learning_rate": 5.4714925750338744e-06, "loss": 0.8082, "step": 13755 }, { "epoch": 0.48588673180595965, "grad_norm": 1.885501503944397, "learning_rate": 5.470923110966356e-06, "loss": 0.8135, "step": 13756 }, { "epoch": 0.48592205360966756, "grad_norm": 1.814007043838501, "learning_rate": 5.470353640735485e-06, "loss": 0.7933, "step": 13757 }, { "epoch": 0.48595737541337547, "grad_norm": 1.7008168697357178, "learning_rate": 5.469784164348713e-06, "loss": 0.806, "step": 13758 }, { "epoch": 0.4859926972170834, "grad_norm": 3.9594533443450928, "learning_rate": 5.469214681813495e-06, "loss": 0.7732, "step": 13759 }, { "epoch": 0.4860280190207913, "grad_norm": 1.7142479419708252, "learning_rate": 5.468645193137283e-06, "loss": 0.7898, "step": 13760 }, { "epoch": 0.4860633408244992, "grad_norm": 1.9103196859359741, "learning_rate": 5.46807569832753e-06, "loss": 0.7735, "step": 13761 }, { "epoch": 0.4860986626282071, "grad_norm": 2.039621114730835, "learning_rate": 5.467506197391691e-06, "loss": 0.7798, "step": 13762 }, { "epoch": 0.486133984431915, "grad_norm": 2.1803925037384033, "learning_rate": 5.466936690337218e-06, "loss": 0.8316, "step": 13763 }, { "epoch": 0.48616930623562293, "grad_norm": 1.7918412685394287, "learning_rate": 5.466367177171568e-06, "loss": 0.8112, "step": 13764 }, { "epoch": 0.48620462803933084, "grad_norm": 1.7959272861480713, "learning_rate": 5.46579765790219e-06, "loss": 0.7642, "step": 13765 }, { "epoch": 0.48623994984303875, "grad_norm": 1.9026646614074707, "learning_rate": 5.46522813253654e-06, "loss": 0.8008, "step": 13766 }, { "epoch": 0.48627527164674667, "grad_norm": 1.7053371667861938, "learning_rate": 5.464658601082071e-06, "loss": 0.8065, "step": 13767 }, { "epoch": 0.4863105934504545, "grad_norm": 1.7798593044281006, "learning_rate": 5.464089063546238e-06, "loss": 0.8052, "step": 13768 }, { "epoch": 0.48634591525416243, "grad_norm": 2.4453494548797607, "learning_rate": 5.463519519936495e-06, "loss": 0.7626, "step": 13769 }, { "epoch": 0.48638123705787034, "grad_norm": 1.685605525970459, "learning_rate": 5.462949970260296e-06, "loss": 0.8144, "step": 13770 }, { "epoch": 0.48641655886157825, "grad_norm": 1.6905838251113892, "learning_rate": 5.462380414525093e-06, "loss": 0.7905, "step": 13771 }, { "epoch": 0.48645188066528616, "grad_norm": 1.6609193086624146, "learning_rate": 5.461810852738344e-06, "loss": 0.7927, "step": 13772 }, { "epoch": 0.4864872024689941, "grad_norm": 1.903464913368225, "learning_rate": 5.4612412849075015e-06, "loss": 0.8039, "step": 13773 }, { "epoch": 0.486522524272702, "grad_norm": 1.6926568746566772, "learning_rate": 5.4606717110400186e-06, "loss": 0.7788, "step": 13774 }, { "epoch": 0.4865578460764099, "grad_norm": 1.5276470184326172, "learning_rate": 5.4601021311433525e-06, "loss": 0.7533, "step": 13775 }, { "epoch": 0.4865931678801178, "grad_norm": 1.7501804828643799, "learning_rate": 5.459532545224954e-06, "loss": 0.8152, "step": 13776 }, { "epoch": 0.4866284896838257, "grad_norm": 1.900071382522583, "learning_rate": 5.458962953292281e-06, "loss": 0.7973, "step": 13777 }, { "epoch": 0.4866638114875336, "grad_norm": 1.703660488128662, "learning_rate": 5.4583933553527865e-06, "loss": 0.8093, "step": 13778 }, { "epoch": 0.48669913329124154, "grad_norm": 1.6553585529327393, "learning_rate": 5.457823751413926e-06, "loss": 0.8176, "step": 13779 }, { "epoch": 0.48673445509494945, "grad_norm": 1.6454787254333496, "learning_rate": 5.457254141483155e-06, "loss": 0.7717, "step": 13780 }, { "epoch": 0.4867697768986573, "grad_norm": 1.771087884902954, "learning_rate": 5.456684525567928e-06, "loss": 0.8114, "step": 13781 }, { "epoch": 0.4868050987023652, "grad_norm": 1.5756725072860718, "learning_rate": 5.4561149036757e-06, "loss": 0.8026, "step": 13782 }, { "epoch": 0.4868404205060731, "grad_norm": 1.6145093441009521, "learning_rate": 5.455545275813925e-06, "loss": 0.8245, "step": 13783 }, { "epoch": 0.48687574230978103, "grad_norm": 1.67228102684021, "learning_rate": 5.454975641990059e-06, "loss": 0.8247, "step": 13784 }, { "epoch": 0.48691106411348895, "grad_norm": 1.7233318090438843, "learning_rate": 5.454406002211558e-06, "loss": 0.8027, "step": 13785 }, { "epoch": 0.48694638591719686, "grad_norm": 1.742656946182251, "learning_rate": 5.453836356485876e-06, "loss": 0.8204, "step": 13786 }, { "epoch": 0.48698170772090477, "grad_norm": 1.4257084131240845, "learning_rate": 5.4532667048204694e-06, "loss": 0.7479, "step": 13787 }, { "epoch": 0.4870170295246127, "grad_norm": 1.6292065382003784, "learning_rate": 5.452697047222794e-06, "loss": 0.8258, "step": 13788 }, { "epoch": 0.4870523513283206, "grad_norm": 1.5971096754074097, "learning_rate": 5.452127383700304e-06, "loss": 0.7798, "step": 13789 }, { "epoch": 0.4870876731320285, "grad_norm": 1.5960166454315186, "learning_rate": 5.451557714260455e-06, "loss": 0.8046, "step": 13790 }, { "epoch": 0.4871229949357364, "grad_norm": 2.3877480030059814, "learning_rate": 5.4509880389107036e-06, "loss": 0.7908, "step": 13791 }, { "epoch": 0.4871583167394443, "grad_norm": 1.8057548999786377, "learning_rate": 5.450418357658508e-06, "loss": 0.794, "step": 13792 }, { "epoch": 0.48719363854315223, "grad_norm": 1.5810929536819458, "learning_rate": 5.4498486705113195e-06, "loss": 0.7718, "step": 13793 }, { "epoch": 0.4872289603468601, "grad_norm": 1.625157356262207, "learning_rate": 5.4492789774765974e-06, "loss": 0.7727, "step": 13794 }, { "epoch": 0.487264282150568, "grad_norm": 1.6909220218658447, "learning_rate": 5.448709278561795e-06, "loss": 0.8363, "step": 13795 }, { "epoch": 0.4872996039542759, "grad_norm": 1.6756961345672607, "learning_rate": 5.44813957377437e-06, "loss": 0.7977, "step": 13796 }, { "epoch": 0.4873349257579838, "grad_norm": 1.6229974031448364, "learning_rate": 5.4475698631217786e-06, "loss": 0.8119, "step": 13797 }, { "epoch": 0.4873702475616917, "grad_norm": 1.8668501377105713, "learning_rate": 5.447000146611477e-06, "loss": 0.7953, "step": 13798 }, { "epoch": 0.48740556936539964, "grad_norm": 1.6398792266845703, "learning_rate": 5.446430424250924e-06, "loss": 0.7992, "step": 13799 }, { "epoch": 0.48744089116910755, "grad_norm": 2.119666814804077, "learning_rate": 5.44586069604757e-06, "loss": 0.786, "step": 13800 }, { "epoch": 0.48747621297281546, "grad_norm": 1.675326943397522, "learning_rate": 5.445290962008878e-06, "loss": 0.7947, "step": 13801 }, { "epoch": 0.48751153477652337, "grad_norm": 1.824059247970581, "learning_rate": 5.4447212221423e-06, "loss": 0.8087, "step": 13802 }, { "epoch": 0.4875468565802313, "grad_norm": 1.6544547080993652, "learning_rate": 5.444151476455294e-06, "loss": 0.8056, "step": 13803 }, { "epoch": 0.4875821783839392, "grad_norm": 1.657720685005188, "learning_rate": 5.443581724955317e-06, "loss": 0.7781, "step": 13804 }, { "epoch": 0.4876175001876471, "grad_norm": 1.606471300125122, "learning_rate": 5.443011967649826e-06, "loss": 0.7978, "step": 13805 }, { "epoch": 0.487652821991355, "grad_norm": 1.8850575685501099, "learning_rate": 5.442442204546278e-06, "loss": 0.7872, "step": 13806 }, { "epoch": 0.48768814379506287, "grad_norm": 1.7528417110443115, "learning_rate": 5.441872435652129e-06, "loss": 0.8148, "step": 13807 }, { "epoch": 0.4877234655987708, "grad_norm": 1.6607736349105835, "learning_rate": 5.4413026609748365e-06, "loss": 0.816, "step": 13808 }, { "epoch": 0.4877587874024787, "grad_norm": 1.756662368774414, "learning_rate": 5.440732880521859e-06, "loss": 0.8165, "step": 13809 }, { "epoch": 0.4877941092061866, "grad_norm": 1.9659584760665894, "learning_rate": 5.440163094300651e-06, "loss": 0.8422, "step": 13810 }, { "epoch": 0.4878294310098945, "grad_norm": 1.695682406425476, "learning_rate": 5.439593302318672e-06, "loss": 0.807, "step": 13811 }, { "epoch": 0.4878647528136024, "grad_norm": 1.72123384475708, "learning_rate": 5.439023504583378e-06, "loss": 0.7935, "step": 13812 }, { "epoch": 0.48790007461731033, "grad_norm": 1.7071762084960938, "learning_rate": 5.438453701102228e-06, "loss": 0.7738, "step": 13813 }, { "epoch": 0.48793539642101824, "grad_norm": 1.6377694606781006, "learning_rate": 5.437883891882677e-06, "loss": 0.7736, "step": 13814 }, { "epoch": 0.48797071822472615, "grad_norm": 1.8636884689331055, "learning_rate": 5.4373140769321854e-06, "loss": 0.7967, "step": 13815 }, { "epoch": 0.48800604002843406, "grad_norm": 1.9059375524520874, "learning_rate": 5.436744256258207e-06, "loss": 0.8141, "step": 13816 }, { "epoch": 0.488041361832142, "grad_norm": 2.022559642791748, "learning_rate": 5.436174429868204e-06, "loss": 0.8136, "step": 13817 }, { "epoch": 0.4880766836358499, "grad_norm": 1.8074287176132202, "learning_rate": 5.435604597769632e-06, "loss": 0.8305, "step": 13818 }, { "epoch": 0.4881120054395578, "grad_norm": 2.0065574645996094, "learning_rate": 5.4350347599699485e-06, "loss": 0.7862, "step": 13819 }, { "epoch": 0.48814732724326565, "grad_norm": 1.8042633533477783, "learning_rate": 5.4344649164766115e-06, "loss": 0.8239, "step": 13820 }, { "epoch": 0.48818264904697356, "grad_norm": 1.713045358657837, "learning_rate": 5.433895067297078e-06, "loss": 0.8299, "step": 13821 }, { "epoch": 0.48821797085068147, "grad_norm": 1.8856106996536255, "learning_rate": 5.433325212438809e-06, "loss": 0.8133, "step": 13822 }, { "epoch": 0.4882532926543894, "grad_norm": 2.2285895347595215, "learning_rate": 5.432755351909262e-06, "loss": 0.7815, "step": 13823 }, { "epoch": 0.4882886144580973, "grad_norm": 1.6142661571502686, "learning_rate": 5.432185485715893e-06, "loss": 0.7856, "step": 13824 }, { "epoch": 0.4883239362618052, "grad_norm": 1.7516746520996094, "learning_rate": 5.431615613866161e-06, "loss": 0.786, "step": 13825 }, { "epoch": 0.4883592580655131, "grad_norm": 1.6843407154083252, "learning_rate": 5.431045736367525e-06, "loss": 0.7996, "step": 13826 }, { "epoch": 0.488394579869221, "grad_norm": 1.8655885457992554, "learning_rate": 5.430475853227445e-06, "loss": 0.783, "step": 13827 }, { "epoch": 0.48842990167292893, "grad_norm": 1.6649433374404907, "learning_rate": 5.429905964453379e-06, "loss": 0.7885, "step": 13828 }, { "epoch": 0.48846522347663685, "grad_norm": 1.6147541999816895, "learning_rate": 5.429336070052782e-06, "loss": 0.8122, "step": 13829 }, { "epoch": 0.48850054528034476, "grad_norm": 1.5442640781402588, "learning_rate": 5.428766170033116e-06, "loss": 0.7886, "step": 13830 }, { "epoch": 0.48853586708405267, "grad_norm": 1.7718192338943481, "learning_rate": 5.428196264401838e-06, "loss": 0.8496, "step": 13831 }, { "epoch": 0.4885711888877606, "grad_norm": 1.630871057510376, "learning_rate": 5.4276263531664095e-06, "loss": 0.8039, "step": 13832 }, { "epoch": 0.4886065106914685, "grad_norm": 1.8532885313034058, "learning_rate": 5.427056436334287e-06, "loss": 0.808, "step": 13833 }, { "epoch": 0.48864183249517634, "grad_norm": 1.8330397605895996, "learning_rate": 5.42648651391293e-06, "loss": 0.7806, "step": 13834 }, { "epoch": 0.48867715429888425, "grad_norm": 2.362581968307495, "learning_rate": 5.425916585909798e-06, "loss": 0.8329, "step": 13835 }, { "epoch": 0.48871247610259216, "grad_norm": 1.686530351638794, "learning_rate": 5.425346652332349e-06, "loss": 0.7952, "step": 13836 }, { "epoch": 0.4887477979063001, "grad_norm": 1.570077657699585, "learning_rate": 5.424776713188043e-06, "loss": 0.7638, "step": 13837 }, { "epoch": 0.488783119710008, "grad_norm": 1.6464475393295288, "learning_rate": 5.4242067684843416e-06, "loss": 0.8135, "step": 13838 }, { "epoch": 0.4888184415137159, "grad_norm": 1.8309454917907715, "learning_rate": 5.423636818228698e-06, "loss": 0.8098, "step": 13839 }, { "epoch": 0.4888537633174238, "grad_norm": 1.676884412765503, "learning_rate": 5.4230668624285776e-06, "loss": 0.7997, "step": 13840 }, { "epoch": 0.4888890851211317, "grad_norm": 1.7728773355484009, "learning_rate": 5.422496901091437e-06, "loss": 0.7764, "step": 13841 }, { "epoch": 0.4889244069248396, "grad_norm": 1.8326877355575562, "learning_rate": 5.4219269342247375e-06, "loss": 0.8049, "step": 13842 }, { "epoch": 0.48895972872854754, "grad_norm": 1.6682159900665283, "learning_rate": 5.421356961835936e-06, "loss": 0.7858, "step": 13843 }, { "epoch": 0.48899505053225545, "grad_norm": 2.276639461517334, "learning_rate": 5.420786983932496e-06, "loss": 0.8173, "step": 13844 }, { "epoch": 0.48903037233596336, "grad_norm": 1.792803406715393, "learning_rate": 5.420217000521872e-06, "loss": 0.8272, "step": 13845 }, { "epoch": 0.48906569413967127, "grad_norm": 1.7206714153289795, "learning_rate": 5.419647011611528e-06, "loss": 0.8428, "step": 13846 }, { "epoch": 0.4891010159433791, "grad_norm": 1.8543907403945923, "learning_rate": 5.419077017208924e-06, "loss": 0.7704, "step": 13847 }, { "epoch": 0.48913633774708704, "grad_norm": 1.7533676624298096, "learning_rate": 5.418507017321518e-06, "loss": 0.8128, "step": 13848 }, { "epoch": 0.48917165955079495, "grad_norm": 1.5752437114715576, "learning_rate": 5.417937011956772e-06, "loss": 0.8078, "step": 13849 }, { "epoch": 0.48920698135450286, "grad_norm": 1.5355242490768433, "learning_rate": 5.417367001122142e-06, "loss": 0.7698, "step": 13850 }, { "epoch": 0.48924230315821077, "grad_norm": 1.6839226484298706, "learning_rate": 5.416796984825094e-06, "loss": 0.806, "step": 13851 }, { "epoch": 0.4892776249619187, "grad_norm": 1.80464768409729, "learning_rate": 5.416226963073085e-06, "loss": 0.8076, "step": 13852 }, { "epoch": 0.4893129467656266, "grad_norm": 1.5301342010498047, "learning_rate": 5.4156569358735746e-06, "loss": 0.7939, "step": 13853 }, { "epoch": 0.4893482685693345, "grad_norm": 1.6140379905700684, "learning_rate": 5.415086903234025e-06, "loss": 0.8221, "step": 13854 }, { "epoch": 0.4893835903730424, "grad_norm": 0.9292678236961365, "learning_rate": 5.414516865161895e-06, "loss": 0.566, "step": 13855 }, { "epoch": 0.4894189121767503, "grad_norm": 1.6536164283752441, "learning_rate": 5.413946821664648e-06, "loss": 0.8013, "step": 13856 }, { "epoch": 0.48945423398045823, "grad_norm": 1.5286240577697754, "learning_rate": 5.4133767727497425e-06, "loss": 0.8418, "step": 13857 }, { "epoch": 0.48948955578416614, "grad_norm": 1.6593009233474731, "learning_rate": 5.412806718424638e-06, "loss": 0.7833, "step": 13858 }, { "epoch": 0.48952487758787405, "grad_norm": 1.7933998107910156, "learning_rate": 5.4122366586967975e-06, "loss": 0.794, "step": 13859 }, { "epoch": 0.4895601993915819, "grad_norm": 1.8485294580459595, "learning_rate": 5.4116665935736815e-06, "loss": 0.8032, "step": 13860 }, { "epoch": 0.4895955211952898, "grad_norm": 1.5238908529281616, "learning_rate": 5.41109652306275e-06, "loss": 0.7475, "step": 13861 }, { "epoch": 0.48963084299899773, "grad_norm": 1.6388670206069946, "learning_rate": 5.410526447171466e-06, "loss": 0.7849, "step": 13862 }, { "epoch": 0.48966616480270564, "grad_norm": 1.6534759998321533, "learning_rate": 5.4099563659072865e-06, "loss": 0.7951, "step": 13863 }, { "epoch": 0.48970148660641355, "grad_norm": 2.4668562412261963, "learning_rate": 5.409386279277676e-06, "loss": 0.7977, "step": 13864 }, { "epoch": 0.48973680841012146, "grad_norm": 1.739075779914856, "learning_rate": 5.408816187290097e-06, "loss": 0.8301, "step": 13865 }, { "epoch": 0.48977213021382937, "grad_norm": 1.823828101158142, "learning_rate": 5.4082460899520065e-06, "loss": 0.7789, "step": 13866 }, { "epoch": 0.4898074520175373, "grad_norm": 1.5846867561340332, "learning_rate": 5.407675987270869e-06, "loss": 0.7935, "step": 13867 }, { "epoch": 0.4898427738212452, "grad_norm": 1.6384286880493164, "learning_rate": 5.407105879254143e-06, "loss": 0.816, "step": 13868 }, { "epoch": 0.4898780956249531, "grad_norm": 1.6543669700622559, "learning_rate": 5.406535765909293e-06, "loss": 0.7724, "step": 13869 }, { "epoch": 0.489913417428661, "grad_norm": 1.5691521167755127, "learning_rate": 5.4059656472437795e-06, "loss": 0.7858, "step": 13870 }, { "epoch": 0.4899487392323689, "grad_norm": 4.61802864074707, "learning_rate": 5.405395523265064e-06, "loss": 0.8068, "step": 13871 }, { "epoch": 0.48998406103607683, "grad_norm": 1.598499059677124, "learning_rate": 5.404825393980607e-06, "loss": 0.7982, "step": 13872 }, { "epoch": 0.4900193828397847, "grad_norm": 1.579687476158142, "learning_rate": 5.404255259397872e-06, "loss": 0.7916, "step": 13873 }, { "epoch": 0.4900547046434926, "grad_norm": 1.7633435726165771, "learning_rate": 5.40368511952432e-06, "loss": 0.788, "step": 13874 }, { "epoch": 0.4900900264472005, "grad_norm": 1.6647471189498901, "learning_rate": 5.403114974367415e-06, "loss": 0.7957, "step": 13875 }, { "epoch": 0.4901253482509084, "grad_norm": 1.5304679870605469, "learning_rate": 5.4025448239346145e-06, "loss": 0.7685, "step": 13876 }, { "epoch": 0.49016067005461633, "grad_norm": 1.6437866687774658, "learning_rate": 5.401974668233384e-06, "loss": 0.8046, "step": 13877 }, { "epoch": 0.49019599185832424, "grad_norm": 1.4484424591064453, "learning_rate": 5.401404507271184e-06, "loss": 0.7666, "step": 13878 }, { "epoch": 0.49023131366203215, "grad_norm": 1.5998737812042236, "learning_rate": 5.400834341055477e-06, "loss": 0.8248, "step": 13879 }, { "epoch": 0.49026663546574006, "grad_norm": 1.7817336320877075, "learning_rate": 5.400264169593727e-06, "loss": 0.8189, "step": 13880 }, { "epoch": 0.490301957269448, "grad_norm": 1.5511146783828735, "learning_rate": 5.399693992893395e-06, "loss": 0.7948, "step": 13881 }, { "epoch": 0.4903372790731559, "grad_norm": 1.6604441404342651, "learning_rate": 5.39912381096194e-06, "loss": 0.7806, "step": 13882 }, { "epoch": 0.4903726008768638, "grad_norm": 3.3563573360443115, "learning_rate": 5.3985536238068305e-06, "loss": 0.7813, "step": 13883 }, { "epoch": 0.4904079226805717, "grad_norm": 1.7127622365951538, "learning_rate": 5.397983431435525e-06, "loss": 0.7896, "step": 13884 }, { "epoch": 0.4904432444842796, "grad_norm": 1.8496220111846924, "learning_rate": 5.397413233855488e-06, "loss": 0.8373, "step": 13885 }, { "epoch": 0.49047856628798747, "grad_norm": 1.8960353136062622, "learning_rate": 5.396843031074181e-06, "loss": 0.7797, "step": 13886 }, { "epoch": 0.4905138880916954, "grad_norm": 1.8866714239120483, "learning_rate": 5.396272823099066e-06, "loss": 0.8012, "step": 13887 }, { "epoch": 0.4905492098954033, "grad_norm": 1.6523396968841553, "learning_rate": 5.395702609937607e-06, "loss": 0.7629, "step": 13888 }, { "epoch": 0.4905845316991112, "grad_norm": 1.7930876016616821, "learning_rate": 5.395132391597267e-06, "loss": 0.79, "step": 13889 }, { "epoch": 0.4906198535028191, "grad_norm": 1.6845035552978516, "learning_rate": 5.39456216808551e-06, "loss": 0.7994, "step": 13890 }, { "epoch": 0.490655175306527, "grad_norm": 1.9251140356063843, "learning_rate": 5.393991939409796e-06, "loss": 0.7734, "step": 13891 }, { "epoch": 0.49069049711023494, "grad_norm": 1.7903079986572266, "learning_rate": 5.3934217055775886e-06, "loss": 0.7743, "step": 13892 }, { "epoch": 0.49072581891394285, "grad_norm": 1.675809621810913, "learning_rate": 5.392851466596355e-06, "loss": 0.7825, "step": 13893 }, { "epoch": 0.49076114071765076, "grad_norm": 1.6617345809936523, "learning_rate": 5.392281222473552e-06, "loss": 0.8072, "step": 13894 }, { "epoch": 0.49079646252135867, "grad_norm": 1.5923395156860352, "learning_rate": 5.3917109732166484e-06, "loss": 0.8052, "step": 13895 }, { "epoch": 0.4908317843250666, "grad_norm": 1.7130084037780762, "learning_rate": 5.391140718833105e-06, "loss": 0.7931, "step": 13896 }, { "epoch": 0.4908671061287745, "grad_norm": 1.732373595237732, "learning_rate": 5.390570459330383e-06, "loss": 0.7861, "step": 13897 }, { "epoch": 0.4909024279324824, "grad_norm": 1.5810356140136719, "learning_rate": 5.39000019471595e-06, "loss": 0.8353, "step": 13898 }, { "epoch": 0.49093774973619025, "grad_norm": 3.1234235763549805, "learning_rate": 5.389429924997268e-06, "loss": 0.7766, "step": 13899 }, { "epoch": 0.49097307153989816, "grad_norm": 2.4294934272766113, "learning_rate": 5.388859650181799e-06, "loss": 0.7752, "step": 13900 }, { "epoch": 0.4910083933436061, "grad_norm": 1.731345534324646, "learning_rate": 5.388289370277009e-06, "loss": 0.7889, "step": 13901 }, { "epoch": 0.491043715147314, "grad_norm": 1.7148628234863281, "learning_rate": 5.387719085290362e-06, "loss": 0.8034, "step": 13902 }, { "epoch": 0.4910790369510219, "grad_norm": 2.0382237434387207, "learning_rate": 5.387148795229319e-06, "loss": 0.7727, "step": 13903 }, { "epoch": 0.4911143587547298, "grad_norm": 1.7223608493804932, "learning_rate": 5.386578500101346e-06, "loss": 0.7767, "step": 13904 }, { "epoch": 0.4911496805584377, "grad_norm": 1.621565818786621, "learning_rate": 5.386008199913905e-06, "loss": 0.8291, "step": 13905 }, { "epoch": 0.49118500236214563, "grad_norm": 0.9174820780754089, "learning_rate": 5.3854378946744624e-06, "loss": 0.6094, "step": 13906 }, { "epoch": 0.49122032416585354, "grad_norm": 1.9985867738723755, "learning_rate": 5.3848675843904805e-06, "loss": 0.8039, "step": 13907 }, { "epoch": 0.49125564596956145, "grad_norm": 1.7429887056350708, "learning_rate": 5.384297269069423e-06, "loss": 0.7773, "step": 13908 }, { "epoch": 0.49129096777326936, "grad_norm": 1.7241692543029785, "learning_rate": 5.383726948718757e-06, "loss": 0.8184, "step": 13909 }, { "epoch": 0.49132628957697727, "grad_norm": 2.3594765663146973, "learning_rate": 5.383156623345945e-06, "loss": 0.8218, "step": 13910 }, { "epoch": 0.4913616113806852, "grad_norm": 1.797372817993164, "learning_rate": 5.38258629295845e-06, "loss": 0.8203, "step": 13911 }, { "epoch": 0.49139693318439304, "grad_norm": 1.898116111755371, "learning_rate": 5.382015957563737e-06, "loss": 0.831, "step": 13912 }, { "epoch": 0.49143225498810095, "grad_norm": 1.8363677263259888, "learning_rate": 5.3814456171692715e-06, "loss": 0.7851, "step": 13913 }, { "epoch": 0.49146757679180886, "grad_norm": 1.6279352903366089, "learning_rate": 5.380875271782519e-06, "loss": 0.8236, "step": 13914 }, { "epoch": 0.49150289859551677, "grad_norm": 1.8542805910110474, "learning_rate": 5.380304921410942e-06, "loss": 0.7872, "step": 13915 }, { "epoch": 0.4915382203992247, "grad_norm": 1.876320481300354, "learning_rate": 5.3797345660620026e-06, "loss": 0.798, "step": 13916 }, { "epoch": 0.4915735422029326, "grad_norm": 3.0068278312683105, "learning_rate": 5.379164205743171e-06, "loss": 0.7963, "step": 13917 }, { "epoch": 0.4916088640066405, "grad_norm": 1.8118470907211304, "learning_rate": 5.37859384046191e-06, "loss": 0.7988, "step": 13918 }, { "epoch": 0.4916441858103484, "grad_norm": 1.9923374652862549, "learning_rate": 5.378023470225682e-06, "loss": 0.7878, "step": 13919 }, { "epoch": 0.4916795076140563, "grad_norm": 1.587959885597229, "learning_rate": 5.3774530950419565e-06, "loss": 0.8155, "step": 13920 }, { "epoch": 0.49171482941776423, "grad_norm": 1.5375877618789673, "learning_rate": 5.3768827149181955e-06, "loss": 0.7758, "step": 13921 }, { "epoch": 0.49175015122147214, "grad_norm": 1.5976606607437134, "learning_rate": 5.3763123298618635e-06, "loss": 0.7869, "step": 13922 }, { "epoch": 0.49178547302518005, "grad_norm": 1.8180351257324219, "learning_rate": 5.375741939880425e-06, "loss": 0.789, "step": 13923 }, { "epoch": 0.49182079482888796, "grad_norm": 1.5944597721099854, "learning_rate": 5.375171544981349e-06, "loss": 0.8129, "step": 13924 }, { "epoch": 0.4918561166325958, "grad_norm": 1.6336454153060913, "learning_rate": 5.374601145172098e-06, "loss": 0.7635, "step": 13925 }, { "epoch": 0.49189143843630373, "grad_norm": 1.8372018337249756, "learning_rate": 5.374030740460137e-06, "loss": 0.8111, "step": 13926 }, { "epoch": 0.49192676024001164, "grad_norm": 1.7576923370361328, "learning_rate": 5.373460330852932e-06, "loss": 0.8345, "step": 13927 }, { "epoch": 0.49196208204371955, "grad_norm": 1.5551728010177612, "learning_rate": 5.372889916357949e-06, "loss": 0.7972, "step": 13928 }, { "epoch": 0.49199740384742746, "grad_norm": 1.5604881048202515, "learning_rate": 5.372319496982651e-06, "loss": 0.7944, "step": 13929 }, { "epoch": 0.49203272565113537, "grad_norm": 1.7710506916046143, "learning_rate": 5.371749072734508e-06, "loss": 0.8159, "step": 13930 }, { "epoch": 0.4920680474548433, "grad_norm": 1.6318697929382324, "learning_rate": 5.371178643620981e-06, "loss": 0.7787, "step": 13931 }, { "epoch": 0.4921033692585512, "grad_norm": 3.295901298522949, "learning_rate": 5.370608209649539e-06, "loss": 0.7982, "step": 13932 }, { "epoch": 0.4921386910622591, "grad_norm": 1.741856575012207, "learning_rate": 5.370037770827647e-06, "loss": 0.7984, "step": 13933 }, { "epoch": 0.492174012865967, "grad_norm": 1.7372020483016968, "learning_rate": 5.369467327162767e-06, "loss": 0.7895, "step": 13934 }, { "epoch": 0.4922093346696749, "grad_norm": 1.8831855058670044, "learning_rate": 5.368896878662371e-06, "loss": 0.7932, "step": 13935 }, { "epoch": 0.49224465647338284, "grad_norm": 1.67094087600708, "learning_rate": 5.368326425333921e-06, "loss": 0.806, "step": 13936 }, { "epoch": 0.49227997827709075, "grad_norm": 1.5985679626464844, "learning_rate": 5.367755967184883e-06, "loss": 0.7628, "step": 13937 }, { "epoch": 0.4923153000807986, "grad_norm": 1.591927409172058, "learning_rate": 5.3671855042227264e-06, "loss": 0.7825, "step": 13938 }, { "epoch": 0.4923506218845065, "grad_norm": 1.6167590618133545, "learning_rate": 5.366615036454914e-06, "loss": 0.7738, "step": 13939 }, { "epoch": 0.4923859436882144, "grad_norm": 1.76580011844635, "learning_rate": 5.366044563888913e-06, "loss": 0.7927, "step": 13940 }, { "epoch": 0.49242126549192233, "grad_norm": 1.7897380590438843, "learning_rate": 5.365474086532188e-06, "loss": 0.824, "step": 13941 }, { "epoch": 0.49245658729563024, "grad_norm": 1.7270532846450806, "learning_rate": 5.3649036043922085e-06, "loss": 0.7696, "step": 13942 }, { "epoch": 0.49249190909933815, "grad_norm": 1.5803208351135254, "learning_rate": 5.364333117476439e-06, "loss": 0.8074, "step": 13943 }, { "epoch": 0.49252723090304606, "grad_norm": 1.5820027589797974, "learning_rate": 5.363762625792346e-06, "loss": 0.7664, "step": 13944 }, { "epoch": 0.492562552706754, "grad_norm": 1.9671663045883179, "learning_rate": 5.363192129347396e-06, "loss": 0.7711, "step": 13945 }, { "epoch": 0.4925978745104619, "grad_norm": 1.8670027256011963, "learning_rate": 5.362621628149055e-06, "loss": 0.7993, "step": 13946 }, { "epoch": 0.4926331963141698, "grad_norm": 2.1286165714263916, "learning_rate": 5.36205112220479e-06, "loss": 0.7623, "step": 13947 }, { "epoch": 0.4926685181178777, "grad_norm": 1.856412649154663, "learning_rate": 5.361480611522071e-06, "loss": 0.8053, "step": 13948 }, { "epoch": 0.4927038399215856, "grad_norm": 1.7123253345489502, "learning_rate": 5.36091009610836e-06, "loss": 0.7947, "step": 13949 }, { "epoch": 0.49273916172529353, "grad_norm": 1.7456897497177124, "learning_rate": 5.360339575971124e-06, "loss": 0.8268, "step": 13950 }, { "epoch": 0.4927744835290014, "grad_norm": 1.8596268892288208, "learning_rate": 5.359769051117834e-06, "loss": 0.8158, "step": 13951 }, { "epoch": 0.4928098053327093, "grad_norm": 1.626186490058899, "learning_rate": 5.359198521555951e-06, "loss": 0.7916, "step": 13952 }, { "epoch": 0.4928451271364172, "grad_norm": 1.7113890647888184, "learning_rate": 5.358627987292948e-06, "loss": 0.8107, "step": 13953 }, { "epoch": 0.4928804489401251, "grad_norm": 1.7338802814483643, "learning_rate": 5.358057448336289e-06, "loss": 0.795, "step": 13954 }, { "epoch": 0.492915770743833, "grad_norm": 1.7070155143737793, "learning_rate": 5.3574869046934395e-06, "loss": 0.8177, "step": 13955 }, { "epoch": 0.49295109254754094, "grad_norm": 1.9430830478668213, "learning_rate": 5.35691635637187e-06, "loss": 0.7982, "step": 13956 }, { "epoch": 0.49298641435124885, "grad_norm": 1.585695505142212, "learning_rate": 5.356345803379047e-06, "loss": 0.761, "step": 13957 }, { "epoch": 0.49302173615495676, "grad_norm": 2.0630042552948, "learning_rate": 5.3557752457224364e-06, "loss": 0.8212, "step": 13958 }, { "epoch": 0.49305705795866467, "grad_norm": 1.838850736618042, "learning_rate": 5.3552046834095075e-06, "loss": 0.8389, "step": 13959 }, { "epoch": 0.4930923797623726, "grad_norm": 1.6885929107666016, "learning_rate": 5.354634116447723e-06, "loss": 0.8143, "step": 13960 }, { "epoch": 0.4931277015660805, "grad_norm": 1.6298764944076538, "learning_rate": 5.354063544844556e-06, "loss": 0.7885, "step": 13961 }, { "epoch": 0.4931630233697884, "grad_norm": 1.685437560081482, "learning_rate": 5.353492968607473e-06, "loss": 0.7814, "step": 13962 }, { "epoch": 0.4931983451734963, "grad_norm": 1.7776364088058472, "learning_rate": 5.35292238774394e-06, "loss": 0.7936, "step": 13963 }, { "epoch": 0.49323366697720417, "grad_norm": 1.7170464992523193, "learning_rate": 5.352351802261424e-06, "loss": 0.7923, "step": 13964 }, { "epoch": 0.4932689887809121, "grad_norm": 1.6361260414123535, "learning_rate": 5.351781212167395e-06, "loss": 0.7809, "step": 13965 }, { "epoch": 0.49330431058462, "grad_norm": 1.6195820569992065, "learning_rate": 5.351210617469318e-06, "loss": 0.7901, "step": 13966 }, { "epoch": 0.4933396323883279, "grad_norm": 1.6360782384872437, "learning_rate": 5.350640018174664e-06, "loss": 0.7654, "step": 13967 }, { "epoch": 0.4933749541920358, "grad_norm": 2.279576063156128, "learning_rate": 5.350069414290899e-06, "loss": 0.8103, "step": 13968 }, { "epoch": 0.4934102759957437, "grad_norm": 1.8065851926803589, "learning_rate": 5.349498805825491e-06, "loss": 0.7993, "step": 13969 }, { "epoch": 0.49344559779945163, "grad_norm": 1.6504526138305664, "learning_rate": 5.34892819278591e-06, "loss": 0.7759, "step": 13970 }, { "epoch": 0.49348091960315954, "grad_norm": 2.1963210105895996, "learning_rate": 5.348357575179619e-06, "loss": 0.7977, "step": 13971 }, { "epoch": 0.49351624140686745, "grad_norm": 1.5658386945724487, "learning_rate": 5.347786953014092e-06, "loss": 0.7897, "step": 13972 }, { "epoch": 0.49355156321057536, "grad_norm": 1.5705350637435913, "learning_rate": 5.347216326296795e-06, "loss": 0.7669, "step": 13973 }, { "epoch": 0.49358688501428327, "grad_norm": 1.9989057779312134, "learning_rate": 5.346645695035194e-06, "loss": 0.7439, "step": 13974 }, { "epoch": 0.4936222068179912, "grad_norm": 1.7198399305343628, "learning_rate": 5.346075059236761e-06, "loss": 0.7585, "step": 13975 }, { "epoch": 0.4936575286216991, "grad_norm": 1.7266201972961426, "learning_rate": 5.3455044189089625e-06, "loss": 0.7854, "step": 13976 }, { "epoch": 0.49369285042540695, "grad_norm": 1.6836973428726196, "learning_rate": 5.344933774059266e-06, "loss": 0.8001, "step": 13977 }, { "epoch": 0.49372817222911486, "grad_norm": 1.5253753662109375, "learning_rate": 5.3443631246951435e-06, "loss": 0.7909, "step": 13978 }, { "epoch": 0.49376349403282277, "grad_norm": 1.6093474626541138, "learning_rate": 5.3437924708240584e-06, "loss": 0.7848, "step": 13979 }, { "epoch": 0.4937988158365307, "grad_norm": 1.6497048139572144, "learning_rate": 5.343221812453483e-06, "loss": 0.8096, "step": 13980 }, { "epoch": 0.4938341376402386, "grad_norm": 1.6797806024551392, "learning_rate": 5.342651149590885e-06, "loss": 0.798, "step": 13981 }, { "epoch": 0.4938694594439465, "grad_norm": 1.8365508317947388, "learning_rate": 5.342080482243733e-06, "loss": 0.7617, "step": 13982 }, { "epoch": 0.4939047812476544, "grad_norm": 0.9683731198310852, "learning_rate": 5.3415098104194975e-06, "loss": 0.592, "step": 13983 }, { "epoch": 0.4939401030513623, "grad_norm": 1.6267958879470825, "learning_rate": 5.340939134125644e-06, "loss": 0.7819, "step": 13984 }, { "epoch": 0.49397542485507023, "grad_norm": 1.910064935684204, "learning_rate": 5.340368453369644e-06, "loss": 0.8336, "step": 13985 }, { "epoch": 0.49401074665877814, "grad_norm": 1.6526743173599243, "learning_rate": 5.339797768158965e-06, "loss": 0.8005, "step": 13986 }, { "epoch": 0.49404606846248605, "grad_norm": 1.5503499507904053, "learning_rate": 5.339227078501078e-06, "loss": 0.7841, "step": 13987 }, { "epoch": 0.49408139026619396, "grad_norm": 1.8127728700637817, "learning_rate": 5.33865638440345e-06, "loss": 0.7883, "step": 13988 }, { "epoch": 0.4941167120699019, "grad_norm": 1.648154377937317, "learning_rate": 5.3380856858735496e-06, "loss": 0.8043, "step": 13989 }, { "epoch": 0.49415203387360973, "grad_norm": 1.6813417673110962, "learning_rate": 5.33751498291885e-06, "loss": 0.8014, "step": 13990 }, { "epoch": 0.49418735567731764, "grad_norm": 1.7420096397399902, "learning_rate": 5.336944275546816e-06, "loss": 0.7835, "step": 13991 }, { "epoch": 0.49422267748102555, "grad_norm": 1.7401379346847534, "learning_rate": 5.336373563764918e-06, "loss": 0.8163, "step": 13992 }, { "epoch": 0.49425799928473346, "grad_norm": 1.8808234930038452, "learning_rate": 5.335802847580628e-06, "loss": 0.8002, "step": 13993 }, { "epoch": 0.4942933210884414, "grad_norm": 1.7541424036026, "learning_rate": 5.3352321270014115e-06, "loss": 0.7641, "step": 13994 }, { "epoch": 0.4943286428921493, "grad_norm": 1.5545555353164673, "learning_rate": 5.334661402034742e-06, "loss": 0.801, "step": 13995 }, { "epoch": 0.4943639646958572, "grad_norm": 1.5783069133758545, "learning_rate": 5.334090672688086e-06, "loss": 0.7873, "step": 13996 }, { "epoch": 0.4943992864995651, "grad_norm": 1.8055020570755005, "learning_rate": 5.333519938968913e-06, "loss": 0.7999, "step": 13997 }, { "epoch": 0.494434608303273, "grad_norm": 1.9205701351165771, "learning_rate": 5.332949200884695e-06, "loss": 0.7975, "step": 13998 }, { "epoch": 0.4944699301069809, "grad_norm": 1.7842342853546143, "learning_rate": 5.3323784584429e-06, "loss": 0.7832, "step": 13999 }, { "epoch": 0.49450525191068884, "grad_norm": 1.5794017314910889, "learning_rate": 5.331807711650998e-06, "loss": 0.7891, "step": 14000 }, { "epoch": 0.49454057371439675, "grad_norm": 1.7028429508209229, "learning_rate": 5.331236960516461e-06, "loss": 0.8231, "step": 14001 }, { "epoch": 0.49457589551810466, "grad_norm": 1.6563371419906616, "learning_rate": 5.330666205046755e-06, "loss": 0.8041, "step": 14002 }, { "epoch": 0.4946112173218125, "grad_norm": 2.335740566253662, "learning_rate": 5.3300954452493516e-06, "loss": 0.8264, "step": 14003 }, { "epoch": 0.4946465391255204, "grad_norm": 1.721465826034546, "learning_rate": 5.3295246811317225e-06, "loss": 0.7937, "step": 14004 }, { "epoch": 0.49468186092922833, "grad_norm": 2.1257684230804443, "learning_rate": 5.328953912701335e-06, "loss": 0.7643, "step": 14005 }, { "epoch": 0.49471718273293624, "grad_norm": 1.723675012588501, "learning_rate": 5.32838313996566e-06, "loss": 0.8119, "step": 14006 }, { "epoch": 0.49475250453664416, "grad_norm": 1.7531872987747192, "learning_rate": 5.32781236293217e-06, "loss": 0.8197, "step": 14007 }, { "epoch": 0.49478782634035207, "grad_norm": 1.6759527921676636, "learning_rate": 5.327241581608333e-06, "loss": 0.8002, "step": 14008 }, { "epoch": 0.49482314814406, "grad_norm": 1.631548285484314, "learning_rate": 5.326670796001618e-06, "loss": 0.8275, "step": 14009 }, { "epoch": 0.4948584699477679, "grad_norm": 1.606974720954895, "learning_rate": 5.326100006119498e-06, "loss": 0.8092, "step": 14010 }, { "epoch": 0.4948937917514758, "grad_norm": 1.679975986480713, "learning_rate": 5.325529211969444e-06, "loss": 0.7753, "step": 14011 }, { "epoch": 0.4949291135551837, "grad_norm": 1.6157208681106567, "learning_rate": 5.324958413558923e-06, "loss": 0.8009, "step": 14012 }, { "epoch": 0.4949644353588916, "grad_norm": 1.698453664779663, "learning_rate": 5.324387610895409e-06, "loss": 0.7614, "step": 14013 }, { "epoch": 0.49499975716259953, "grad_norm": 1.5849874019622803, "learning_rate": 5.3238168039863705e-06, "loss": 0.7891, "step": 14014 }, { "epoch": 0.49503507896630744, "grad_norm": 1.6656066179275513, "learning_rate": 5.323245992839277e-06, "loss": 0.8082, "step": 14015 }, { "epoch": 0.4950704007700153, "grad_norm": 2.0345559120178223, "learning_rate": 5.3226751774616016e-06, "loss": 0.7974, "step": 14016 }, { "epoch": 0.4951057225737232, "grad_norm": 1.631762146949768, "learning_rate": 5.322104357860815e-06, "loss": 0.8043, "step": 14017 }, { "epoch": 0.4951410443774311, "grad_norm": 1.5485002994537354, "learning_rate": 5.321533534044385e-06, "loss": 0.818, "step": 14018 }, { "epoch": 0.495176366181139, "grad_norm": 1.7247847318649292, "learning_rate": 5.320962706019785e-06, "loss": 0.7955, "step": 14019 }, { "epoch": 0.49521168798484694, "grad_norm": 1.8141825199127197, "learning_rate": 5.320391873794487e-06, "loss": 0.7982, "step": 14020 }, { "epoch": 0.49524700978855485, "grad_norm": 1.625972867012024, "learning_rate": 5.319821037375959e-06, "loss": 0.8017, "step": 14021 }, { "epoch": 0.49528233159226276, "grad_norm": 1.816714882850647, "learning_rate": 5.319250196771676e-06, "loss": 0.7877, "step": 14022 }, { "epoch": 0.49531765339597067, "grad_norm": 1.6160860061645508, "learning_rate": 5.318679351989103e-06, "loss": 0.807, "step": 14023 }, { "epoch": 0.4953529751996786, "grad_norm": 1.7083642482757568, "learning_rate": 5.318108503035717e-06, "loss": 0.7856, "step": 14024 }, { "epoch": 0.4953882970033865, "grad_norm": 1.937896966934204, "learning_rate": 5.317537649918986e-06, "loss": 0.7793, "step": 14025 }, { "epoch": 0.4954236188070944, "grad_norm": 1.550157904624939, "learning_rate": 5.316966792646381e-06, "loss": 0.7928, "step": 14026 }, { "epoch": 0.4954589406108023, "grad_norm": 2.2987828254699707, "learning_rate": 5.316395931225375e-06, "loss": 0.7679, "step": 14027 }, { "epoch": 0.4954942624145102, "grad_norm": 1.6992913484573364, "learning_rate": 5.315825065663439e-06, "loss": 0.7901, "step": 14028 }, { "epoch": 0.4955295842182181, "grad_norm": 1.6711968183517456, "learning_rate": 5.315254195968042e-06, "loss": 0.8046, "step": 14029 }, { "epoch": 0.495564906021926, "grad_norm": 1.6216201782226562, "learning_rate": 5.314683322146659e-06, "loss": 0.7774, "step": 14030 }, { "epoch": 0.4956002278256339, "grad_norm": 1.6033251285552979, "learning_rate": 5.31411244420676e-06, "loss": 0.8148, "step": 14031 }, { "epoch": 0.4956355496293418, "grad_norm": 1.5841844081878662, "learning_rate": 5.313541562155816e-06, "loss": 0.7795, "step": 14032 }, { "epoch": 0.4956708714330497, "grad_norm": 1.5749908685684204, "learning_rate": 5.312970676001299e-06, "loss": 0.8048, "step": 14033 }, { "epoch": 0.49570619323675763, "grad_norm": 1.5427258014678955, "learning_rate": 5.31239978575068e-06, "loss": 0.805, "step": 14034 }, { "epoch": 0.49574151504046554, "grad_norm": 1.9234552383422852, "learning_rate": 5.311828891411432e-06, "loss": 0.7957, "step": 14035 }, { "epoch": 0.49577683684417345, "grad_norm": 1.8412487506866455, "learning_rate": 5.311257992991027e-06, "loss": 0.8167, "step": 14036 }, { "epoch": 0.49581215864788136, "grad_norm": 1.6446881294250488, "learning_rate": 5.310687090496933e-06, "loss": 0.8071, "step": 14037 }, { "epoch": 0.4958474804515893, "grad_norm": 1.7161954641342163, "learning_rate": 5.310116183936626e-06, "loss": 0.8388, "step": 14038 }, { "epoch": 0.4958828022552972, "grad_norm": 1.5453284978866577, "learning_rate": 5.309545273317579e-06, "loss": 0.7796, "step": 14039 }, { "epoch": 0.4959181240590051, "grad_norm": 1.6369694471359253, "learning_rate": 5.308974358647258e-06, "loss": 0.7738, "step": 14040 }, { "epoch": 0.495953445862713, "grad_norm": 1.5321002006530762, "learning_rate": 5.308403439933143e-06, "loss": 0.7826, "step": 14041 }, { "epoch": 0.49598876766642086, "grad_norm": 3.1912038326263428, "learning_rate": 5.307832517182699e-06, "loss": 0.7901, "step": 14042 }, { "epoch": 0.49602408947012877, "grad_norm": 1.5567842721939087, "learning_rate": 5.307261590403401e-06, "loss": 0.7764, "step": 14043 }, { "epoch": 0.4960594112738367, "grad_norm": 1.6524235010147095, "learning_rate": 5.306690659602721e-06, "loss": 0.8281, "step": 14044 }, { "epoch": 0.4960947330775446, "grad_norm": 1.6233625411987305, "learning_rate": 5.306119724788131e-06, "loss": 0.8173, "step": 14045 }, { "epoch": 0.4961300548812525, "grad_norm": 1.5984132289886475, "learning_rate": 5.305548785967104e-06, "loss": 0.7864, "step": 14046 }, { "epoch": 0.4961653766849604, "grad_norm": 1.5442055463790894, "learning_rate": 5.304977843147112e-06, "loss": 0.8057, "step": 14047 }, { "epoch": 0.4962006984886683, "grad_norm": 1.610436201095581, "learning_rate": 5.3044068963356275e-06, "loss": 0.7981, "step": 14048 }, { "epoch": 0.49623602029237623, "grad_norm": 1.6081516742706299, "learning_rate": 5.3038359455401235e-06, "loss": 0.7893, "step": 14049 }, { "epoch": 0.49627134209608414, "grad_norm": 1.609920859336853, "learning_rate": 5.303264990768071e-06, "loss": 0.8624, "step": 14050 }, { "epoch": 0.49630666389979206, "grad_norm": 1.7526768445968628, "learning_rate": 5.302694032026943e-06, "loss": 0.7964, "step": 14051 }, { "epoch": 0.49634198570349997, "grad_norm": 1.647939920425415, "learning_rate": 5.302123069324212e-06, "loss": 0.8002, "step": 14052 }, { "epoch": 0.4963773075072079, "grad_norm": 1.5020204782485962, "learning_rate": 5.301552102667352e-06, "loss": 0.8016, "step": 14053 }, { "epoch": 0.4964126293109158, "grad_norm": 1.6547297239303589, "learning_rate": 5.300981132063835e-06, "loss": 0.8058, "step": 14054 }, { "epoch": 0.49644795111462364, "grad_norm": 1.5458062887191772, "learning_rate": 5.300410157521132e-06, "loss": 0.8006, "step": 14055 }, { "epoch": 0.49648327291833155, "grad_norm": 1.7247108221054077, "learning_rate": 5.2998391790467185e-06, "loss": 0.8152, "step": 14056 }, { "epoch": 0.49651859472203946, "grad_norm": 1.7569317817687988, "learning_rate": 5.299268196648066e-06, "loss": 0.8644, "step": 14057 }, { "epoch": 0.4965539165257474, "grad_norm": 1.786502480506897, "learning_rate": 5.298697210332647e-06, "loss": 0.7911, "step": 14058 }, { "epoch": 0.4965892383294553, "grad_norm": 1.8170270919799805, "learning_rate": 5.298126220107937e-06, "loss": 0.8531, "step": 14059 }, { "epoch": 0.4966245601331632, "grad_norm": 1.770383358001709, "learning_rate": 5.297555225981404e-06, "loss": 0.8116, "step": 14060 }, { "epoch": 0.4966598819368711, "grad_norm": 1.618872880935669, "learning_rate": 5.296984227960527e-06, "loss": 0.8059, "step": 14061 }, { "epoch": 0.496695203740579, "grad_norm": 2.208428382873535, "learning_rate": 5.296413226052775e-06, "loss": 0.8283, "step": 14062 }, { "epoch": 0.4967305255442869, "grad_norm": 1.648363709449768, "learning_rate": 5.295842220265622e-06, "loss": 0.8194, "step": 14063 }, { "epoch": 0.49676584734799484, "grad_norm": 1.6819828748703003, "learning_rate": 5.295271210606543e-06, "loss": 0.7886, "step": 14064 }, { "epoch": 0.49680116915170275, "grad_norm": 1.5748201608657837, "learning_rate": 5.294700197083009e-06, "loss": 0.7869, "step": 14065 }, { "epoch": 0.49683649095541066, "grad_norm": 1.5391440391540527, "learning_rate": 5.294129179702494e-06, "loss": 0.7838, "step": 14066 }, { "epoch": 0.49687181275911857, "grad_norm": 1.7040363550186157, "learning_rate": 5.293558158472471e-06, "loss": 0.8197, "step": 14067 }, { "epoch": 0.4969071345628264, "grad_norm": 1.5990322828292847, "learning_rate": 5.292987133400416e-06, "loss": 0.7807, "step": 14068 }, { "epoch": 0.49694245636653434, "grad_norm": 1.7291419506072998, "learning_rate": 5.292416104493799e-06, "loss": 0.8017, "step": 14069 }, { "epoch": 0.49697777817024225, "grad_norm": 1.6567155122756958, "learning_rate": 5.2918450717600965e-06, "loss": 0.78, "step": 14070 }, { "epoch": 0.49701309997395016, "grad_norm": 1.7172318696975708, "learning_rate": 5.291274035206778e-06, "loss": 0.7532, "step": 14071 }, { "epoch": 0.49704842177765807, "grad_norm": 1.715316653251648, "learning_rate": 5.290702994841321e-06, "loss": 0.7801, "step": 14072 }, { "epoch": 0.497083743581366, "grad_norm": 1.650633692741394, "learning_rate": 5.290131950671199e-06, "loss": 0.794, "step": 14073 }, { "epoch": 0.4971190653850739, "grad_norm": 1.9541105031967163, "learning_rate": 5.289560902703883e-06, "loss": 0.8172, "step": 14074 }, { "epoch": 0.4971543871887818, "grad_norm": 0.9097116589546204, "learning_rate": 5.288989850946848e-06, "loss": 0.5689, "step": 14075 }, { "epoch": 0.4971897089924897, "grad_norm": 1.540520429611206, "learning_rate": 5.288418795407569e-06, "loss": 0.7572, "step": 14076 }, { "epoch": 0.4972250307961976, "grad_norm": 1.6449854373931885, "learning_rate": 5.287847736093519e-06, "loss": 0.7934, "step": 14077 }, { "epoch": 0.49726035259990553, "grad_norm": 1.5486055612564087, "learning_rate": 5.287276673012174e-06, "loss": 0.8279, "step": 14078 }, { "epoch": 0.49729567440361344, "grad_norm": 1.6290420293807983, "learning_rate": 5.286705606171003e-06, "loss": 0.7934, "step": 14079 }, { "epoch": 0.49733099620732135, "grad_norm": 1.8657019138336182, "learning_rate": 5.286134535577484e-06, "loss": 0.8039, "step": 14080 }, { "epoch": 0.4973663180110292, "grad_norm": 1.6449331045150757, "learning_rate": 5.285563461239088e-06, "loss": 0.8004, "step": 14081 }, { "epoch": 0.4974016398147371, "grad_norm": 1.6069262027740479, "learning_rate": 5.284992383163295e-06, "loss": 0.8385, "step": 14082 }, { "epoch": 0.49743696161844503, "grad_norm": 1.7672550678253174, "learning_rate": 5.284421301357571e-06, "loss": 0.7989, "step": 14083 }, { "epoch": 0.49747228342215294, "grad_norm": 1.5465290546417236, "learning_rate": 5.283850215829397e-06, "loss": 0.7773, "step": 14084 }, { "epoch": 0.49750760522586085, "grad_norm": 1.7339938879013062, "learning_rate": 5.283279126586245e-06, "loss": 0.8082, "step": 14085 }, { "epoch": 0.49754292702956876, "grad_norm": 1.5122720003128052, "learning_rate": 5.282708033635588e-06, "loss": 0.796, "step": 14086 }, { "epoch": 0.49757824883327667, "grad_norm": 1.632304310798645, "learning_rate": 5.282136936984903e-06, "loss": 0.7953, "step": 14087 }, { "epoch": 0.4976135706369846, "grad_norm": 1.626634120941162, "learning_rate": 5.281565836641661e-06, "loss": 0.7839, "step": 14088 }, { "epoch": 0.4976488924406925, "grad_norm": 1.5303093194961548, "learning_rate": 5.280994732613338e-06, "loss": 0.7437, "step": 14089 }, { "epoch": 0.4976842142444004, "grad_norm": 1.7682085037231445, "learning_rate": 5.280423624907411e-06, "loss": 0.7674, "step": 14090 }, { "epoch": 0.4977195360481083, "grad_norm": 1.5631109476089478, "learning_rate": 5.279852513531351e-06, "loss": 0.781, "step": 14091 }, { "epoch": 0.4977548578518162, "grad_norm": 1.6208776235580444, "learning_rate": 5.279281398492633e-06, "loss": 0.8155, "step": 14092 }, { "epoch": 0.49779017965552413, "grad_norm": 1.5985651016235352, "learning_rate": 5.278710279798733e-06, "loss": 0.7648, "step": 14093 }, { "epoch": 0.497825501459232, "grad_norm": 1.8487679958343506, "learning_rate": 5.278139157457126e-06, "loss": 0.7944, "step": 14094 }, { "epoch": 0.4978608232629399, "grad_norm": 1.6955586671829224, "learning_rate": 5.277568031475285e-06, "loss": 0.8251, "step": 14095 }, { "epoch": 0.4978961450666478, "grad_norm": 1.9468547105789185, "learning_rate": 5.276996901860688e-06, "loss": 0.8225, "step": 14096 }, { "epoch": 0.4979314668703557, "grad_norm": 1.6283810138702393, "learning_rate": 5.276425768620805e-06, "loss": 0.7869, "step": 14097 }, { "epoch": 0.49796678867406363, "grad_norm": 1.7072728872299194, "learning_rate": 5.275854631763114e-06, "loss": 0.8483, "step": 14098 }, { "epoch": 0.49800211047777154, "grad_norm": 1.813634991645813, "learning_rate": 5.2752834912950915e-06, "loss": 0.7619, "step": 14099 }, { "epoch": 0.49803743228147945, "grad_norm": 1.6768879890441895, "learning_rate": 5.274712347224208e-06, "loss": 0.7985, "step": 14100 }, { "epoch": 0.49807275408518736, "grad_norm": 1.781773328781128, "learning_rate": 5.2741411995579415e-06, "loss": 0.8143, "step": 14101 }, { "epoch": 0.4981080758888953, "grad_norm": 2.3348228931427, "learning_rate": 5.273570048303768e-06, "loss": 0.801, "step": 14102 }, { "epoch": 0.4981433976926032, "grad_norm": 1.632857322692871, "learning_rate": 5.272998893469159e-06, "loss": 0.7976, "step": 14103 }, { "epoch": 0.4981787194963111, "grad_norm": 1.5792323350906372, "learning_rate": 5.272427735061593e-06, "loss": 0.7418, "step": 14104 }, { "epoch": 0.498214041300019, "grad_norm": 1.591458797454834, "learning_rate": 5.271856573088543e-06, "loss": 0.7847, "step": 14105 }, { "epoch": 0.4982493631037269, "grad_norm": 1.7794158458709717, "learning_rate": 5.271285407557487e-06, "loss": 0.7862, "step": 14106 }, { "epoch": 0.49828468490743477, "grad_norm": 1.7255356311798096, "learning_rate": 5.270714238475898e-06, "loss": 0.7907, "step": 14107 }, { "epoch": 0.4983200067111427, "grad_norm": 1.6724299192428589, "learning_rate": 5.270143065851251e-06, "loss": 0.7879, "step": 14108 }, { "epoch": 0.4983553285148506, "grad_norm": 1.550389051437378, "learning_rate": 5.269571889691023e-06, "loss": 0.7768, "step": 14109 }, { "epoch": 0.4983906503185585, "grad_norm": 1.6399282217025757, "learning_rate": 5.269000710002688e-06, "loss": 0.7973, "step": 14110 }, { "epoch": 0.4984259721222664, "grad_norm": 1.6411470174789429, "learning_rate": 5.268429526793722e-06, "loss": 0.7841, "step": 14111 }, { "epoch": 0.4984612939259743, "grad_norm": 1.6611518859863281, "learning_rate": 5.267858340071603e-06, "loss": 0.8068, "step": 14112 }, { "epoch": 0.49849661572968224, "grad_norm": 2.210784435272217, "learning_rate": 5.267287149843803e-06, "loss": 0.78, "step": 14113 }, { "epoch": 0.49853193753339015, "grad_norm": 1.4901471138000488, "learning_rate": 5.266715956117798e-06, "loss": 0.8083, "step": 14114 }, { "epoch": 0.49856725933709806, "grad_norm": 1.6800819635391235, "learning_rate": 5.266144758901067e-06, "loss": 0.7957, "step": 14115 }, { "epoch": 0.49860258114080597, "grad_norm": 1.5378808975219727, "learning_rate": 5.265573558201082e-06, "loss": 0.7869, "step": 14116 }, { "epoch": 0.4986379029445139, "grad_norm": 1.4729362726211548, "learning_rate": 5.2650023540253215e-06, "loss": 0.7555, "step": 14117 }, { "epoch": 0.4986732247482218, "grad_norm": 1.7746576070785522, "learning_rate": 5.264431146381258e-06, "loss": 0.819, "step": 14118 }, { "epoch": 0.4987085465519297, "grad_norm": 1.8043946027755737, "learning_rate": 5.2638599352763705e-06, "loss": 0.8253, "step": 14119 }, { "epoch": 0.49874386835563755, "grad_norm": 1.7087860107421875, "learning_rate": 5.263288720718134e-06, "loss": 0.8107, "step": 14120 }, { "epoch": 0.49877919015934546, "grad_norm": 1.9216346740722656, "learning_rate": 5.262717502714023e-06, "loss": 0.7992, "step": 14121 }, { "epoch": 0.4988145119630534, "grad_norm": 1.6052145957946777, "learning_rate": 5.262146281271516e-06, "loss": 0.7982, "step": 14122 }, { "epoch": 0.4988498337667613, "grad_norm": 1.6076252460479736, "learning_rate": 5.261575056398088e-06, "loss": 0.799, "step": 14123 }, { "epoch": 0.4988851555704692, "grad_norm": 1.7222535610198975, "learning_rate": 5.2610038281012145e-06, "loss": 0.798, "step": 14124 }, { "epoch": 0.4989204773741771, "grad_norm": 1.622959852218628, "learning_rate": 5.260432596388372e-06, "loss": 0.7871, "step": 14125 }, { "epoch": 0.498955799177885, "grad_norm": 1.599189043045044, "learning_rate": 5.259861361267035e-06, "loss": 0.7958, "step": 14126 }, { "epoch": 0.49899112098159293, "grad_norm": 1.6186891794204712, "learning_rate": 5.259290122744683e-06, "loss": 0.7924, "step": 14127 }, { "epoch": 0.49902644278530084, "grad_norm": 1.6007311344146729, "learning_rate": 5.2587188808287904e-06, "loss": 0.7641, "step": 14128 }, { "epoch": 0.49906176458900875, "grad_norm": 1.6251438856124878, "learning_rate": 5.258147635526832e-06, "loss": 0.8001, "step": 14129 }, { "epoch": 0.49909708639271666, "grad_norm": 1.6058788299560547, "learning_rate": 5.257576386846288e-06, "loss": 0.7722, "step": 14130 }, { "epoch": 0.49913240819642457, "grad_norm": 1.054908037185669, "learning_rate": 5.257005134794633e-06, "loss": 0.5881, "step": 14131 }, { "epoch": 0.4991677300001325, "grad_norm": 1.6076807975769043, "learning_rate": 5.256433879379341e-06, "loss": 0.7905, "step": 14132 }, { "epoch": 0.49920305180384034, "grad_norm": 1.5001229047775269, "learning_rate": 5.255862620607893e-06, "loss": 0.788, "step": 14133 }, { "epoch": 0.49923837360754825, "grad_norm": 1.5917060375213623, "learning_rate": 5.255291358487762e-06, "loss": 0.8065, "step": 14134 }, { "epoch": 0.49927369541125616, "grad_norm": 2.350759506225586, "learning_rate": 5.2547200930264255e-06, "loss": 0.8198, "step": 14135 }, { "epoch": 0.49930901721496407, "grad_norm": 1.8336092233657837, "learning_rate": 5.254148824231361e-06, "loss": 0.8044, "step": 14136 }, { "epoch": 0.499344339018672, "grad_norm": 1.0498734712600708, "learning_rate": 5.253577552110043e-06, "loss": 0.6091, "step": 14137 }, { "epoch": 0.4993796608223799, "grad_norm": 1.624182105064392, "learning_rate": 5.25300627666995e-06, "loss": 0.7979, "step": 14138 }, { "epoch": 0.4994149826260878, "grad_norm": 1.6973021030426025, "learning_rate": 5.252434997918559e-06, "loss": 0.7917, "step": 14139 }, { "epoch": 0.4994503044297957, "grad_norm": 1.668573021888733, "learning_rate": 5.251863715863345e-06, "loss": 0.7916, "step": 14140 }, { "epoch": 0.4994856262335036, "grad_norm": 1.586058497428894, "learning_rate": 5.251292430511788e-06, "loss": 0.7888, "step": 14141 }, { "epoch": 0.49952094803721153, "grad_norm": 1.6360403299331665, "learning_rate": 5.250721141871363e-06, "loss": 0.8537, "step": 14142 }, { "epoch": 0.49955626984091944, "grad_norm": 1.540163516998291, "learning_rate": 5.250149849949547e-06, "loss": 0.7941, "step": 14143 }, { "epoch": 0.49959159164462735, "grad_norm": 1.8062090873718262, "learning_rate": 5.249578554753814e-06, "loss": 0.8126, "step": 14144 }, { "epoch": 0.49962691344833526, "grad_norm": 1.6604275703430176, "learning_rate": 5.249007256291646e-06, "loss": 0.8152, "step": 14145 }, { "epoch": 0.4996622352520431, "grad_norm": 1.6810063123703003, "learning_rate": 5.2484359545705176e-06, "loss": 0.8078, "step": 14146 }, { "epoch": 0.49969755705575103, "grad_norm": 1.7261966466903687, "learning_rate": 5.247864649597905e-06, "loss": 0.8158, "step": 14147 }, { "epoch": 0.49973287885945894, "grad_norm": 1.9051932096481323, "learning_rate": 5.247293341381288e-06, "loss": 0.8048, "step": 14148 }, { "epoch": 0.49976820066316685, "grad_norm": 1.5328577756881714, "learning_rate": 5.246722029928141e-06, "loss": 0.7859, "step": 14149 }, { "epoch": 0.49980352246687476, "grad_norm": 2.5082929134368896, "learning_rate": 5.246150715245943e-06, "loss": 0.8068, "step": 14150 }, { "epoch": 0.49983884427058267, "grad_norm": 1.8546937704086304, "learning_rate": 5.24557939734217e-06, "loss": 0.8063, "step": 14151 }, { "epoch": 0.4998741660742906, "grad_norm": 1.7438241243362427, "learning_rate": 5.245008076224302e-06, "loss": 0.8098, "step": 14152 }, { "epoch": 0.4999094878779985, "grad_norm": 1.8191345930099487, "learning_rate": 5.2444367518998126e-06, "loss": 0.7782, "step": 14153 }, { "epoch": 0.4999448096817064, "grad_norm": 1.8032222986221313, "learning_rate": 5.243865424376182e-06, "loss": 0.8589, "step": 14154 }, { "epoch": 0.4999801314854143, "grad_norm": 1.8960496187210083, "learning_rate": 5.243294093660885e-06, "loss": 0.8274, "step": 14155 }, { "epoch": 0.5000154532891222, "grad_norm": 1.8202165365219116, "learning_rate": 5.242722759761402e-06, "loss": 0.8074, "step": 14156 }, { "epoch": 0.5000507750928301, "grad_norm": 1.7973580360412598, "learning_rate": 5.242151422685209e-06, "loss": 0.8175, "step": 14157 }, { "epoch": 0.500086096896538, "grad_norm": 1.678395390510559, "learning_rate": 5.241580082439782e-06, "loss": 0.8075, "step": 14158 }, { "epoch": 0.500121418700246, "grad_norm": 2.0335729122161865, "learning_rate": 5.241008739032602e-06, "loss": 0.8038, "step": 14159 }, { "epoch": 0.5001567405039539, "grad_norm": 1.567535161972046, "learning_rate": 5.240437392471145e-06, "loss": 0.8054, "step": 14160 }, { "epoch": 0.5001920623076618, "grad_norm": 1.6713221073150635, "learning_rate": 5.2398660427628886e-06, "loss": 0.7988, "step": 14161 }, { "epoch": 0.5002273841113697, "grad_norm": 1.5783501863479614, "learning_rate": 5.239294689915311e-06, "loss": 0.7673, "step": 14162 }, { "epoch": 0.5002627059150776, "grad_norm": 1.600692868232727, "learning_rate": 5.238723333935888e-06, "loss": 0.8102, "step": 14163 }, { "epoch": 0.5002980277187855, "grad_norm": 1.6638150215148926, "learning_rate": 5.2381519748320995e-06, "loss": 0.7949, "step": 14164 }, { "epoch": 0.5003333495224934, "grad_norm": 1.693357229232788, "learning_rate": 5.237580612611424e-06, "loss": 0.8049, "step": 14165 }, { "epoch": 0.5003686713262012, "grad_norm": 1.7823821306228638, "learning_rate": 5.2370092472813346e-06, "loss": 0.7869, "step": 14166 }, { "epoch": 0.5004039931299091, "grad_norm": 1.6890270709991455, "learning_rate": 5.2364378788493165e-06, "loss": 0.7833, "step": 14167 }, { "epoch": 0.500439314933617, "grad_norm": 1.4585216045379639, "learning_rate": 5.2358665073228405e-06, "loss": 0.8065, "step": 14168 }, { "epoch": 0.500474636737325, "grad_norm": 1.5263574123382568, "learning_rate": 5.23529513270939e-06, "loss": 0.817, "step": 14169 }, { "epoch": 0.5005099585410329, "grad_norm": 1.4932094812393188, "learning_rate": 5.234723755016443e-06, "loss": 0.7861, "step": 14170 }, { "epoch": 0.5005452803447408, "grad_norm": 1.6957589387893677, "learning_rate": 5.234152374251473e-06, "loss": 0.8018, "step": 14171 }, { "epoch": 0.5005806021484487, "grad_norm": 1.6287977695465088, "learning_rate": 5.233580990421961e-06, "loss": 0.7988, "step": 14172 }, { "epoch": 0.5006159239521566, "grad_norm": 1.7145256996154785, "learning_rate": 5.233009603535385e-06, "loss": 0.8321, "step": 14173 }, { "epoch": 0.5006512457558645, "grad_norm": 1.6696950197219849, "learning_rate": 5.232438213599224e-06, "loss": 0.8171, "step": 14174 }, { "epoch": 0.5006865675595724, "grad_norm": 1.6129953861236572, "learning_rate": 5.231866820620954e-06, "loss": 0.772, "step": 14175 }, { "epoch": 0.5007218893632803, "grad_norm": 1.7926487922668457, "learning_rate": 5.231295424608055e-06, "loss": 0.7914, "step": 14176 }, { "epoch": 0.5007572111669882, "grad_norm": 1.736444354057312, "learning_rate": 5.230724025568006e-06, "loss": 0.7947, "step": 14177 }, { "epoch": 0.5007925329706961, "grad_norm": 1.6785424947738647, "learning_rate": 5.230152623508283e-06, "loss": 0.7874, "step": 14178 }, { "epoch": 0.5008278547744041, "grad_norm": 2.4091551303863525, "learning_rate": 5.229581218436367e-06, "loss": 0.8115, "step": 14179 }, { "epoch": 0.500863176578112, "grad_norm": 1.7305079698562622, "learning_rate": 5.229009810359734e-06, "loss": 0.816, "step": 14180 }, { "epoch": 0.5008984983818199, "grad_norm": 1.6396499872207642, "learning_rate": 5.228438399285863e-06, "loss": 0.8012, "step": 14181 }, { "epoch": 0.5009338201855278, "grad_norm": 2.063730239868164, "learning_rate": 5.2278669852222356e-06, "loss": 0.8095, "step": 14182 }, { "epoch": 0.5009691419892357, "grad_norm": 1.5331608057022095, "learning_rate": 5.227295568176327e-06, "loss": 0.8014, "step": 14183 }, { "epoch": 0.5010044637929436, "grad_norm": 1.89895498752594, "learning_rate": 5.226724148155615e-06, "loss": 0.7982, "step": 14184 }, { "epoch": 0.5010397855966515, "grad_norm": 1.615934133529663, "learning_rate": 5.226152725167582e-06, "loss": 0.7746, "step": 14185 }, { "epoch": 0.5010751074003594, "grad_norm": 1.6902772188186646, "learning_rate": 5.225581299219704e-06, "loss": 0.8082, "step": 14186 }, { "epoch": 0.5011104292040673, "grad_norm": 1.6380884647369385, "learning_rate": 5.22500987031946e-06, "loss": 0.7747, "step": 14187 }, { "epoch": 0.5011457510077753, "grad_norm": 1.6417866945266724, "learning_rate": 5.224438438474329e-06, "loss": 0.7861, "step": 14188 }, { "epoch": 0.5011810728114832, "grad_norm": 1.61549973487854, "learning_rate": 5.223867003691791e-06, "loss": 0.7664, "step": 14189 }, { "epoch": 0.5012163946151911, "grad_norm": 1.6079601049423218, "learning_rate": 5.223295565979324e-06, "loss": 0.782, "step": 14190 }, { "epoch": 0.501251716418899, "grad_norm": 1.8720113039016724, "learning_rate": 5.222724125344405e-06, "loss": 0.7821, "step": 14191 }, { "epoch": 0.5012870382226068, "grad_norm": 1.6207386255264282, "learning_rate": 5.222152681794515e-06, "loss": 0.8116, "step": 14192 }, { "epoch": 0.5013223600263147, "grad_norm": 2.2101638317108154, "learning_rate": 5.2215812353371334e-06, "loss": 0.7858, "step": 14193 }, { "epoch": 0.5013576818300226, "grad_norm": 0.9007580280303955, "learning_rate": 5.2210097859797375e-06, "loss": 0.565, "step": 14194 }, { "epoch": 0.5013930036337305, "grad_norm": 1.6502513885498047, "learning_rate": 5.220438333729806e-06, "loss": 0.7981, "step": 14195 }, { "epoch": 0.5014283254374384, "grad_norm": 1.790270209312439, "learning_rate": 5.2198668785948205e-06, "loss": 0.8028, "step": 14196 }, { "epoch": 0.5014636472411463, "grad_norm": 1.631423830986023, "learning_rate": 5.219295420582257e-06, "loss": 0.8361, "step": 14197 }, { "epoch": 0.5014989690448542, "grad_norm": 1.5998315811157227, "learning_rate": 5.2187239596996e-06, "loss": 0.7939, "step": 14198 }, { "epoch": 0.5015342908485622, "grad_norm": 1.775217890739441, "learning_rate": 5.218152495954322e-06, "loss": 0.8017, "step": 14199 }, { "epoch": 0.5015696126522701, "grad_norm": 1.5809515714645386, "learning_rate": 5.217581029353904e-06, "loss": 0.7734, "step": 14200 }, { "epoch": 0.501604934455978, "grad_norm": 1.6167051792144775, "learning_rate": 5.217009559905829e-06, "loss": 0.7852, "step": 14201 }, { "epoch": 0.5016402562596859, "grad_norm": 1.8030539751052856, "learning_rate": 5.216438087617571e-06, "loss": 0.8245, "step": 14202 }, { "epoch": 0.5016755780633938, "grad_norm": 2.169666290283203, "learning_rate": 5.215866612496615e-06, "loss": 0.8395, "step": 14203 }, { "epoch": 0.5017108998671017, "grad_norm": 1.7949650287628174, "learning_rate": 5.215295134550435e-06, "loss": 0.7949, "step": 14204 }, { "epoch": 0.5017462216708096, "grad_norm": 1.7924513816833496, "learning_rate": 5.2147236537865145e-06, "loss": 0.7907, "step": 14205 }, { "epoch": 0.5017815434745175, "grad_norm": 1.6447830200195312, "learning_rate": 5.21415217021233e-06, "loss": 0.8082, "step": 14206 }, { "epoch": 0.5018168652782254, "grad_norm": 1.6404097080230713, "learning_rate": 5.213580683835363e-06, "loss": 0.7899, "step": 14207 }, { "epoch": 0.5018521870819334, "grad_norm": 1.6135345697402954, "learning_rate": 5.213009194663092e-06, "loss": 0.797, "step": 14208 }, { "epoch": 0.5018875088856413, "grad_norm": 2.8154354095458984, "learning_rate": 5.212437702702997e-06, "loss": 0.8044, "step": 14209 }, { "epoch": 0.5019228306893492, "grad_norm": 1.977262258529663, "learning_rate": 5.211866207962556e-06, "loss": 0.8041, "step": 14210 }, { "epoch": 0.5019581524930571, "grad_norm": 1.5617371797561646, "learning_rate": 5.21129471044925e-06, "loss": 0.7878, "step": 14211 }, { "epoch": 0.501993474296765, "grad_norm": 1.668093204498291, "learning_rate": 5.2107232101705595e-06, "loss": 0.8278, "step": 14212 }, { "epoch": 0.5020287961004729, "grad_norm": 1.645056128501892, "learning_rate": 5.2101517071339624e-06, "loss": 0.8106, "step": 14213 }, { "epoch": 0.5020641179041808, "grad_norm": 1.603251576423645, "learning_rate": 5.20958020134694e-06, "loss": 0.8096, "step": 14214 }, { "epoch": 0.5020994397078887, "grad_norm": 1.6770520210266113, "learning_rate": 5.209008692816971e-06, "loss": 0.7736, "step": 14215 }, { "epoch": 0.5021347615115966, "grad_norm": 1.8567918539047241, "learning_rate": 5.208437181551536e-06, "loss": 0.798, "step": 14216 }, { "epoch": 0.5021700833153045, "grad_norm": 1.7294329404830933, "learning_rate": 5.207865667558114e-06, "loss": 0.7978, "step": 14217 }, { "epoch": 0.5022054051190123, "grad_norm": 1.8649665117263794, "learning_rate": 5.2072941508441855e-06, "loss": 0.8312, "step": 14218 }, { "epoch": 0.5022407269227203, "grad_norm": 1.6508197784423828, "learning_rate": 5.206722631417229e-06, "loss": 0.8023, "step": 14219 }, { "epoch": 0.5022760487264282, "grad_norm": 1.8283190727233887, "learning_rate": 5.206151109284727e-06, "loss": 0.7782, "step": 14220 }, { "epoch": 0.5023113705301361, "grad_norm": 1.7427397966384888, "learning_rate": 5.2055795844541564e-06, "loss": 0.786, "step": 14221 }, { "epoch": 0.502346692333844, "grad_norm": 1.7727123498916626, "learning_rate": 5.205008056933001e-06, "loss": 0.8193, "step": 14222 }, { "epoch": 0.5023820141375519, "grad_norm": 1.8209174871444702, "learning_rate": 5.2044365267287366e-06, "loss": 0.7986, "step": 14223 }, { "epoch": 0.5024173359412598, "grad_norm": 1.5712966918945312, "learning_rate": 5.203864993848846e-06, "loss": 0.7811, "step": 14224 }, { "epoch": 0.5024526577449677, "grad_norm": 1.5872316360473633, "learning_rate": 5.203293458300809e-06, "loss": 0.7816, "step": 14225 }, { "epoch": 0.5024879795486756, "grad_norm": 1.877292513847351, "learning_rate": 5.2027219200921064e-06, "loss": 0.8023, "step": 14226 }, { "epoch": 0.5025233013523835, "grad_norm": 1.853056788444519, "learning_rate": 5.202150379230216e-06, "loss": 0.7937, "step": 14227 }, { "epoch": 0.5025586231560915, "grad_norm": 1.592677354812622, "learning_rate": 5.201578835722621e-06, "loss": 0.8283, "step": 14228 }, { "epoch": 0.5025939449597994, "grad_norm": 1.782723307609558, "learning_rate": 5.201007289576797e-06, "loss": 0.7994, "step": 14229 }, { "epoch": 0.5026292667635073, "grad_norm": 1.628448247909546, "learning_rate": 5.20043574080023e-06, "loss": 0.7938, "step": 14230 }, { "epoch": 0.5026645885672152, "grad_norm": 1.7243926525115967, "learning_rate": 5.199864189400396e-06, "loss": 0.7736, "step": 14231 }, { "epoch": 0.5026999103709231, "grad_norm": 1.6399216651916504, "learning_rate": 5.199292635384778e-06, "loss": 0.8002, "step": 14232 }, { "epoch": 0.502735232174631, "grad_norm": 1.6512718200683594, "learning_rate": 5.198721078760857e-06, "loss": 0.7726, "step": 14233 }, { "epoch": 0.5027705539783389, "grad_norm": 1.9094473123550415, "learning_rate": 5.198149519536108e-06, "loss": 0.8265, "step": 14234 }, { "epoch": 0.5028058757820468, "grad_norm": 1.6650094985961914, "learning_rate": 5.197577957718021e-06, "loss": 0.7971, "step": 14235 }, { "epoch": 0.5028411975857547, "grad_norm": 1.6699774265289307, "learning_rate": 5.197006393314066e-06, "loss": 0.7935, "step": 14236 }, { "epoch": 0.5028765193894627, "grad_norm": 1.6839486360549927, "learning_rate": 5.19643482633173e-06, "loss": 0.7849, "step": 14237 }, { "epoch": 0.5029118411931706, "grad_norm": 1.6343423128128052, "learning_rate": 5.195863256778493e-06, "loss": 0.8043, "step": 14238 }, { "epoch": 0.5029471629968785, "grad_norm": 1.6311780214309692, "learning_rate": 5.195291684661832e-06, "loss": 0.7879, "step": 14239 }, { "epoch": 0.5029824848005864, "grad_norm": 1.5010868310928345, "learning_rate": 5.1947201099892316e-06, "loss": 0.7617, "step": 14240 }, { "epoch": 0.5030178066042943, "grad_norm": 1.7536821365356445, "learning_rate": 5.194148532768171e-06, "loss": 0.7941, "step": 14241 }, { "epoch": 0.5030531284080022, "grad_norm": 1.8588292598724365, "learning_rate": 5.193576953006129e-06, "loss": 0.8179, "step": 14242 }, { "epoch": 0.5030884502117101, "grad_norm": 1.717625379562378, "learning_rate": 5.193005370710591e-06, "loss": 0.8166, "step": 14243 }, { "epoch": 0.5031237720154179, "grad_norm": 1.538787603378296, "learning_rate": 5.192433785889035e-06, "loss": 0.7807, "step": 14244 }, { "epoch": 0.5031590938191258, "grad_norm": 1.696719765663147, "learning_rate": 5.1918621985489405e-06, "loss": 0.7932, "step": 14245 }, { "epoch": 0.5031944156228337, "grad_norm": 1.548407793045044, "learning_rate": 5.191290608697791e-06, "loss": 0.7859, "step": 14246 }, { "epoch": 0.5032297374265416, "grad_norm": 1.740170955657959, "learning_rate": 5.190719016343063e-06, "loss": 0.8085, "step": 14247 }, { "epoch": 0.5032650592302496, "grad_norm": 1.5666359663009644, "learning_rate": 5.1901474214922435e-06, "loss": 0.789, "step": 14248 }, { "epoch": 0.5033003810339575, "grad_norm": 1.8082903623580933, "learning_rate": 5.18957582415281e-06, "loss": 0.8109, "step": 14249 }, { "epoch": 0.5033357028376654, "grad_norm": 1.668599247932434, "learning_rate": 5.189004224332242e-06, "loss": 0.7948, "step": 14250 }, { "epoch": 0.5033710246413733, "grad_norm": 1.7294692993164062, "learning_rate": 5.188432622038022e-06, "loss": 0.7638, "step": 14251 }, { "epoch": 0.5034063464450812, "grad_norm": 1.5647746324539185, "learning_rate": 5.187861017277633e-06, "loss": 0.7828, "step": 14252 }, { "epoch": 0.5034416682487891, "grad_norm": 1.6191580295562744, "learning_rate": 5.1872894100585535e-06, "loss": 0.781, "step": 14253 }, { "epoch": 0.503476990052497, "grad_norm": 1.6267459392547607, "learning_rate": 5.1867178003882655e-06, "loss": 0.7962, "step": 14254 }, { "epoch": 0.5035123118562049, "grad_norm": 1.702497959136963, "learning_rate": 5.18614618827425e-06, "loss": 0.7966, "step": 14255 }, { "epoch": 0.5035476336599128, "grad_norm": 1.7401204109191895, "learning_rate": 5.185574573723988e-06, "loss": 0.7767, "step": 14256 }, { "epoch": 0.5035829554636208, "grad_norm": 1.8114042282104492, "learning_rate": 5.1850029567449626e-06, "loss": 0.7703, "step": 14257 }, { "epoch": 0.5036182772673287, "grad_norm": 2.051931381225586, "learning_rate": 5.18443133734465e-06, "loss": 0.7884, "step": 14258 }, { "epoch": 0.5036535990710366, "grad_norm": 1.4995588064193726, "learning_rate": 5.183859715530537e-06, "loss": 0.8076, "step": 14259 }, { "epoch": 0.5036889208747445, "grad_norm": 1.6551483869552612, "learning_rate": 5.183288091310101e-06, "loss": 0.847, "step": 14260 }, { "epoch": 0.5037242426784524, "grad_norm": 1.9714536666870117, "learning_rate": 5.1827164646908265e-06, "loss": 0.7718, "step": 14261 }, { "epoch": 0.5037595644821603, "grad_norm": 1.675320029258728, "learning_rate": 5.182144835680194e-06, "loss": 0.8211, "step": 14262 }, { "epoch": 0.5037948862858682, "grad_norm": 1.968787670135498, "learning_rate": 5.1815732042856825e-06, "loss": 0.813, "step": 14263 }, { "epoch": 0.5038302080895761, "grad_norm": 1.8337613344192505, "learning_rate": 5.181001570514775e-06, "loss": 0.8195, "step": 14264 }, { "epoch": 0.503865529893284, "grad_norm": 1.6401704549789429, "learning_rate": 5.1804299343749535e-06, "loss": 0.8023, "step": 14265 }, { "epoch": 0.503900851696992, "grad_norm": 2.6013853549957275, "learning_rate": 5.1798582958736986e-06, "loss": 0.7989, "step": 14266 }, { "epoch": 0.5039361735006999, "grad_norm": 1.6809334754943848, "learning_rate": 5.1792866550184925e-06, "loss": 0.8045, "step": 14267 }, { "epoch": 0.5039714953044078, "grad_norm": 1.7757054567337036, "learning_rate": 5.178715011816816e-06, "loss": 0.792, "step": 14268 }, { "epoch": 0.5040068171081157, "grad_norm": 1.6109380722045898, "learning_rate": 5.178143366276152e-06, "loss": 0.8249, "step": 14269 }, { "epoch": 0.5040421389118235, "grad_norm": 1.560304045677185, "learning_rate": 5.17757171840398e-06, "loss": 0.79, "step": 14270 }, { "epoch": 0.5040774607155314, "grad_norm": 1.0687954425811768, "learning_rate": 5.177000068207783e-06, "loss": 0.6024, "step": 14271 }, { "epoch": 0.5041127825192393, "grad_norm": 1.5037384033203125, "learning_rate": 5.176428415695043e-06, "loss": 0.7879, "step": 14272 }, { "epoch": 0.5041481043229472, "grad_norm": 1.6871552467346191, "learning_rate": 5.17585676087324e-06, "loss": 0.812, "step": 14273 }, { "epoch": 0.5041834261266551, "grad_norm": 1.5311671495437622, "learning_rate": 5.175285103749857e-06, "loss": 0.8059, "step": 14274 }, { "epoch": 0.504218747930363, "grad_norm": 1.7532703876495361, "learning_rate": 5.174713444332376e-06, "loss": 0.7885, "step": 14275 }, { "epoch": 0.5042540697340709, "grad_norm": 1.6262856721878052, "learning_rate": 5.174141782628278e-06, "loss": 0.764, "step": 14276 }, { "epoch": 0.5042893915377789, "grad_norm": 2.15844988822937, "learning_rate": 5.1735701186450445e-06, "loss": 0.8367, "step": 14277 }, { "epoch": 0.5043247133414868, "grad_norm": 1.847006916999817, "learning_rate": 5.17299845239016e-06, "loss": 0.7922, "step": 14278 }, { "epoch": 0.5043600351451947, "grad_norm": 1.9240230321884155, "learning_rate": 5.1724267838711015e-06, "loss": 0.7869, "step": 14279 }, { "epoch": 0.5043953569489026, "grad_norm": 1.6796724796295166, "learning_rate": 5.171855113095355e-06, "loss": 0.8297, "step": 14280 }, { "epoch": 0.5044306787526105, "grad_norm": 1.5844223499298096, "learning_rate": 5.171283440070403e-06, "loss": 0.7895, "step": 14281 }, { "epoch": 0.5044660005563184, "grad_norm": 1.7127965688705444, "learning_rate": 5.170711764803723e-06, "loss": 0.8018, "step": 14282 }, { "epoch": 0.5045013223600263, "grad_norm": 1.864203691482544, "learning_rate": 5.170140087302801e-06, "loss": 0.772, "step": 14283 }, { "epoch": 0.5045366441637342, "grad_norm": 1.6583877801895142, "learning_rate": 5.169568407575115e-06, "loss": 0.8007, "step": 14284 }, { "epoch": 0.5045719659674421, "grad_norm": 2.3672633171081543, "learning_rate": 5.168996725628152e-06, "loss": 0.7819, "step": 14285 }, { "epoch": 0.50460728777115, "grad_norm": 2.015367031097412, "learning_rate": 5.168425041469392e-06, "loss": 0.8009, "step": 14286 }, { "epoch": 0.504642609574858, "grad_norm": 1.8375235795974731, "learning_rate": 5.167853355106314e-06, "loss": 0.7737, "step": 14287 }, { "epoch": 0.5046779313785659, "grad_norm": 1.8912943601608276, "learning_rate": 5.167281666546405e-06, "loss": 0.7967, "step": 14288 }, { "epoch": 0.5047132531822738, "grad_norm": 1.6707537174224854, "learning_rate": 5.166709975797143e-06, "loss": 0.7472, "step": 14289 }, { "epoch": 0.5047485749859817, "grad_norm": 1.9844635725021362, "learning_rate": 5.166138282866016e-06, "loss": 0.7726, "step": 14290 }, { "epoch": 0.5047838967896896, "grad_norm": 1.5867174863815308, "learning_rate": 5.1655665877605e-06, "loss": 0.7545, "step": 14291 }, { "epoch": 0.5048192185933975, "grad_norm": 1.8404909372329712, "learning_rate": 5.164994890488079e-06, "loss": 0.7775, "step": 14292 }, { "epoch": 0.5048545403971054, "grad_norm": 1.742309331893921, "learning_rate": 5.164423191056236e-06, "loss": 0.7974, "step": 14293 }, { "epoch": 0.5048898622008133, "grad_norm": 1.7280213832855225, "learning_rate": 5.163851489472453e-06, "loss": 0.7796, "step": 14294 }, { "epoch": 0.5049251840045212, "grad_norm": 1.5873754024505615, "learning_rate": 5.163279785744213e-06, "loss": 0.8353, "step": 14295 }, { "epoch": 0.504960505808229, "grad_norm": 2.8674135208129883, "learning_rate": 5.162708079878998e-06, "loss": 0.8334, "step": 14296 }, { "epoch": 0.504995827611937, "grad_norm": 1.9717488288879395, "learning_rate": 5.16213637188429e-06, "loss": 0.7888, "step": 14297 }, { "epoch": 0.5050311494156449, "grad_norm": 1.7343122959136963, "learning_rate": 5.161564661767573e-06, "loss": 0.8257, "step": 14298 }, { "epoch": 0.5050664712193528, "grad_norm": 1.771000862121582, "learning_rate": 5.160992949536327e-06, "loss": 0.8165, "step": 14299 }, { "epoch": 0.5051017930230607, "grad_norm": 1.6087920665740967, "learning_rate": 5.160421235198035e-06, "loss": 0.7804, "step": 14300 }, { "epoch": 0.5051371148267686, "grad_norm": 1.6378686428070068, "learning_rate": 5.159849518760182e-06, "loss": 0.8228, "step": 14301 }, { "epoch": 0.5051724366304765, "grad_norm": 1.8437368869781494, "learning_rate": 5.159277800230245e-06, "loss": 0.7869, "step": 14302 }, { "epoch": 0.5052077584341844, "grad_norm": 2.253009796142578, "learning_rate": 5.158706079615713e-06, "loss": 0.7835, "step": 14303 }, { "epoch": 0.5052430802378923, "grad_norm": 1.7543470859527588, "learning_rate": 5.158134356924065e-06, "loss": 0.7998, "step": 14304 }, { "epoch": 0.5052784020416002, "grad_norm": 1.743340253829956, "learning_rate": 5.157562632162784e-06, "loss": 0.8425, "step": 14305 }, { "epoch": 0.5053137238453081, "grad_norm": 1.5326632261276245, "learning_rate": 5.156990905339353e-06, "loss": 0.8074, "step": 14306 }, { "epoch": 0.5053490456490161, "grad_norm": 1.7238781452178955, "learning_rate": 5.156419176461255e-06, "loss": 0.8063, "step": 14307 }, { "epoch": 0.505384367452724, "grad_norm": 1.703268051147461, "learning_rate": 5.155847445535971e-06, "loss": 0.7785, "step": 14308 }, { "epoch": 0.5054196892564319, "grad_norm": 1.6798855066299438, "learning_rate": 5.155275712570987e-06, "loss": 0.7945, "step": 14309 }, { "epoch": 0.5054550110601398, "grad_norm": 1.7493959665298462, "learning_rate": 5.154703977573782e-06, "loss": 0.7645, "step": 14310 }, { "epoch": 0.5054903328638477, "grad_norm": 1.9665812253952026, "learning_rate": 5.1541322405518415e-06, "loss": 0.8245, "step": 14311 }, { "epoch": 0.5055256546675556, "grad_norm": 1.5250425338745117, "learning_rate": 5.153560501512645e-06, "loss": 0.7907, "step": 14312 }, { "epoch": 0.5055609764712635, "grad_norm": 1.717577576637268, "learning_rate": 5.15298876046368e-06, "loss": 0.7663, "step": 14313 }, { "epoch": 0.5055962982749714, "grad_norm": 1.768012285232544, "learning_rate": 5.152417017412426e-06, "loss": 0.8156, "step": 14314 }, { "epoch": 0.5056316200786793, "grad_norm": 1.9397835731506348, "learning_rate": 5.151845272366366e-06, "loss": 0.8004, "step": 14315 }, { "epoch": 0.5056669418823873, "grad_norm": 1.9152147769927979, "learning_rate": 5.151273525332984e-06, "loss": 0.8143, "step": 14316 }, { "epoch": 0.5057022636860952, "grad_norm": 0.9648075103759766, "learning_rate": 5.150701776319763e-06, "loss": 0.5868, "step": 14317 }, { "epoch": 0.5057375854898031, "grad_norm": 1.675602674484253, "learning_rate": 5.150130025334186e-06, "loss": 0.782, "step": 14318 }, { "epoch": 0.505772907293511, "grad_norm": 1.6446658372879028, "learning_rate": 5.149558272383735e-06, "loss": 0.8201, "step": 14319 }, { "epoch": 0.5058082290972189, "grad_norm": 1.7723971605300903, "learning_rate": 5.148986517475892e-06, "loss": 0.8004, "step": 14320 }, { "epoch": 0.5058435509009268, "grad_norm": 1.71671724319458, "learning_rate": 5.1484147606181425e-06, "loss": 0.7532, "step": 14321 }, { "epoch": 0.5058788727046346, "grad_norm": 1.6202023029327393, "learning_rate": 5.147843001817969e-06, "loss": 0.7755, "step": 14322 }, { "epoch": 0.5059141945083425, "grad_norm": 1.60942804813385, "learning_rate": 5.1472712410828525e-06, "loss": 0.7677, "step": 14323 }, { "epoch": 0.5059495163120504, "grad_norm": 1.5596145391464233, "learning_rate": 5.146699478420279e-06, "loss": 0.8029, "step": 14324 }, { "epoch": 0.5059848381157583, "grad_norm": 1.6625406742095947, "learning_rate": 5.146127713837731e-06, "loss": 0.774, "step": 14325 }, { "epoch": 0.5060201599194663, "grad_norm": 1.9276875257492065, "learning_rate": 5.14555594734269e-06, "loss": 0.7995, "step": 14326 }, { "epoch": 0.5060554817231742, "grad_norm": 1.6332035064697266, "learning_rate": 5.144984178942642e-06, "loss": 0.7968, "step": 14327 }, { "epoch": 0.5060908035268821, "grad_norm": 1.5710567235946655, "learning_rate": 5.144412408645065e-06, "loss": 0.7438, "step": 14328 }, { "epoch": 0.50612612533059, "grad_norm": 1.6176996231079102, "learning_rate": 5.143840636457448e-06, "loss": 0.7593, "step": 14329 }, { "epoch": 0.5061614471342979, "grad_norm": 1.8660365343093872, "learning_rate": 5.1432688623872705e-06, "loss": 0.7939, "step": 14330 }, { "epoch": 0.5061967689380058, "grad_norm": 1.6334819793701172, "learning_rate": 5.142697086442017e-06, "loss": 0.779, "step": 14331 }, { "epoch": 0.5062320907417137, "grad_norm": 1.86369788646698, "learning_rate": 5.142125308629171e-06, "loss": 0.8064, "step": 14332 }, { "epoch": 0.5062674125454216, "grad_norm": 1.5345889329910278, "learning_rate": 5.141553528956217e-06, "loss": 0.7954, "step": 14333 }, { "epoch": 0.5063027343491295, "grad_norm": 1.8230059146881104, "learning_rate": 5.1409817474306345e-06, "loss": 0.8344, "step": 14334 }, { "epoch": 0.5063380561528374, "grad_norm": 1.6134060621261597, "learning_rate": 5.140409964059912e-06, "loss": 0.8114, "step": 14335 }, { "epoch": 0.5063733779565454, "grad_norm": 1.6252731084823608, "learning_rate": 5.1398381788515295e-06, "loss": 0.7999, "step": 14336 }, { "epoch": 0.5064086997602533, "grad_norm": 1.6716569662094116, "learning_rate": 5.139266391812972e-06, "loss": 0.7935, "step": 14337 }, { "epoch": 0.5064440215639612, "grad_norm": 1.8843040466308594, "learning_rate": 5.1386946029517204e-06, "loss": 0.8144, "step": 14338 }, { "epoch": 0.5064793433676691, "grad_norm": 1.6343269348144531, "learning_rate": 5.138122812275259e-06, "loss": 0.8005, "step": 14339 }, { "epoch": 0.506514665171377, "grad_norm": 1.7619540691375732, "learning_rate": 5.137551019791075e-06, "loss": 0.7872, "step": 14340 }, { "epoch": 0.5065499869750849, "grad_norm": 1.9279823303222656, "learning_rate": 5.136979225506647e-06, "loss": 0.8046, "step": 14341 }, { "epoch": 0.5065853087787928, "grad_norm": 1.8256312608718872, "learning_rate": 5.136407429429462e-06, "loss": 0.8206, "step": 14342 }, { "epoch": 0.5066206305825007, "grad_norm": 1.8434324264526367, "learning_rate": 5.135835631567002e-06, "loss": 0.8105, "step": 14343 }, { "epoch": 0.5066559523862086, "grad_norm": 1.7586805820465088, "learning_rate": 5.13526383192675e-06, "loss": 0.7725, "step": 14344 }, { "epoch": 0.5066912741899166, "grad_norm": 1.702581763267517, "learning_rate": 5.134692030516191e-06, "loss": 0.7549, "step": 14345 }, { "epoch": 0.5067265959936245, "grad_norm": 1.5869102478027344, "learning_rate": 5.134120227342809e-06, "loss": 0.7828, "step": 14346 }, { "epoch": 0.5067619177973324, "grad_norm": 1.6761022806167603, "learning_rate": 5.133548422414085e-06, "loss": 0.8086, "step": 14347 }, { "epoch": 0.5067972396010402, "grad_norm": 1.6921831369400024, "learning_rate": 5.132976615737505e-06, "loss": 0.8046, "step": 14348 }, { "epoch": 0.5068325614047481, "grad_norm": 1.7180821895599365, "learning_rate": 5.132404807320552e-06, "loss": 0.7705, "step": 14349 }, { "epoch": 0.506867883208456, "grad_norm": 1.8177369832992554, "learning_rate": 5.131832997170709e-06, "loss": 0.8189, "step": 14350 }, { "epoch": 0.5069032050121639, "grad_norm": 1.8517721891403198, "learning_rate": 5.131261185295462e-06, "loss": 0.8206, "step": 14351 }, { "epoch": 0.5069385268158718, "grad_norm": 1.9581609964370728, "learning_rate": 5.130689371702292e-06, "loss": 0.8393, "step": 14352 }, { "epoch": 0.5069738486195797, "grad_norm": 1.7434699535369873, "learning_rate": 5.130117556398685e-06, "loss": 0.7926, "step": 14353 }, { "epoch": 0.5070091704232876, "grad_norm": 2.1224148273468018, "learning_rate": 5.1295457393921235e-06, "loss": 0.787, "step": 14354 }, { "epoch": 0.5070444922269955, "grad_norm": 1.614099383354187, "learning_rate": 5.128973920690093e-06, "loss": 0.7718, "step": 14355 }, { "epoch": 0.5070798140307035, "grad_norm": 1.8217473030090332, "learning_rate": 5.128402100300074e-06, "loss": 0.8344, "step": 14356 }, { "epoch": 0.5071151358344114, "grad_norm": 1.9882593154907227, "learning_rate": 5.127830278229552e-06, "loss": 0.8034, "step": 14357 }, { "epoch": 0.5071504576381193, "grad_norm": 2.153794050216675, "learning_rate": 5.127258454486013e-06, "loss": 0.7874, "step": 14358 }, { "epoch": 0.5071857794418272, "grad_norm": 1.7388018369674683, "learning_rate": 5.126686629076939e-06, "loss": 0.7755, "step": 14359 }, { "epoch": 0.5072211012455351, "grad_norm": 1.5672705173492432, "learning_rate": 5.126114802009814e-06, "loss": 0.8046, "step": 14360 }, { "epoch": 0.507256423049243, "grad_norm": 1.7948639392852783, "learning_rate": 5.125542973292122e-06, "loss": 0.7783, "step": 14361 }, { "epoch": 0.5072917448529509, "grad_norm": 2.2501649856567383, "learning_rate": 5.124971142931349e-06, "loss": 0.7819, "step": 14362 }, { "epoch": 0.5073270666566588, "grad_norm": 1.7086305618286133, "learning_rate": 5.124399310934974e-06, "loss": 0.7863, "step": 14363 }, { "epoch": 0.5073623884603667, "grad_norm": 1.7605267763137817, "learning_rate": 5.123827477310487e-06, "loss": 0.793, "step": 14364 }, { "epoch": 0.5073977102640747, "grad_norm": 1.735914945602417, "learning_rate": 5.123255642065368e-06, "loss": 0.7816, "step": 14365 }, { "epoch": 0.5074330320677826, "grad_norm": 1.5764704942703247, "learning_rate": 5.122683805207103e-06, "loss": 0.7707, "step": 14366 }, { "epoch": 0.5074683538714905, "grad_norm": 1.657089114189148, "learning_rate": 5.122111966743174e-06, "loss": 0.8099, "step": 14367 }, { "epoch": 0.5075036756751984, "grad_norm": 1.5988210439682007, "learning_rate": 5.121540126681067e-06, "loss": 0.7909, "step": 14368 }, { "epoch": 0.5075389974789063, "grad_norm": 1.6523158550262451, "learning_rate": 5.120968285028266e-06, "loss": 0.8074, "step": 14369 }, { "epoch": 0.5075743192826142, "grad_norm": 1.753024935722351, "learning_rate": 5.120396441792255e-06, "loss": 0.7921, "step": 14370 }, { "epoch": 0.5076096410863221, "grad_norm": 1.670597791671753, "learning_rate": 5.119824596980517e-06, "loss": 0.8026, "step": 14371 }, { "epoch": 0.50764496289003, "grad_norm": 1.655437707901001, "learning_rate": 5.119252750600539e-06, "loss": 0.7807, "step": 14372 }, { "epoch": 0.5076802846937379, "grad_norm": 0.8883463144302368, "learning_rate": 5.1186809026598024e-06, "loss": 0.596, "step": 14373 }, { "epoch": 0.5077156064974457, "grad_norm": 1.718071699142456, "learning_rate": 5.118109053165793e-06, "loss": 0.7803, "step": 14374 }, { "epoch": 0.5077509283011536, "grad_norm": 1.7088723182678223, "learning_rate": 5.117537202125994e-06, "loss": 0.7926, "step": 14375 }, { "epoch": 0.5077862501048616, "grad_norm": 1.7806745767593384, "learning_rate": 5.1169653495478896e-06, "loss": 0.8138, "step": 14376 }, { "epoch": 0.5078215719085695, "grad_norm": 1.840638279914856, "learning_rate": 5.116393495438966e-06, "loss": 0.8044, "step": 14377 }, { "epoch": 0.5078568937122774, "grad_norm": 1.7218666076660156, "learning_rate": 5.115821639806705e-06, "loss": 0.7932, "step": 14378 }, { "epoch": 0.5078922155159853, "grad_norm": 1.8200474977493286, "learning_rate": 5.115249782658592e-06, "loss": 0.8125, "step": 14379 }, { "epoch": 0.5079275373196932, "grad_norm": 2.0469157695770264, "learning_rate": 5.114677924002113e-06, "loss": 0.8429, "step": 14380 }, { "epoch": 0.5079628591234011, "grad_norm": 1.70651376247406, "learning_rate": 5.114106063844749e-06, "loss": 0.8023, "step": 14381 }, { "epoch": 0.507998180927109, "grad_norm": 1.8293349742889404, "learning_rate": 5.113534202193988e-06, "loss": 0.8269, "step": 14382 }, { "epoch": 0.5080335027308169, "grad_norm": 1.7462100982666016, "learning_rate": 5.1129623390573135e-06, "loss": 0.7671, "step": 14383 }, { "epoch": 0.5080688245345248, "grad_norm": 1.516350507736206, "learning_rate": 5.112390474442206e-06, "loss": 0.8016, "step": 14384 }, { "epoch": 0.5081041463382328, "grad_norm": 1.6737995147705078, "learning_rate": 5.111818608356156e-06, "loss": 0.7829, "step": 14385 }, { "epoch": 0.5081394681419407, "grad_norm": 1.5872150659561157, "learning_rate": 5.111246740806642e-06, "loss": 0.7753, "step": 14386 }, { "epoch": 0.5081747899456486, "grad_norm": 1.6538667678833008, "learning_rate": 5.110674871801154e-06, "loss": 0.7794, "step": 14387 }, { "epoch": 0.5082101117493565, "grad_norm": 1.6938047409057617, "learning_rate": 5.110103001347174e-06, "loss": 0.7669, "step": 14388 }, { "epoch": 0.5082454335530644, "grad_norm": 1.7301440238952637, "learning_rate": 5.109531129452186e-06, "loss": 0.814, "step": 14389 }, { "epoch": 0.5082807553567723, "grad_norm": 1.852754831314087, "learning_rate": 5.108959256123675e-06, "loss": 0.8004, "step": 14390 }, { "epoch": 0.5083160771604802, "grad_norm": 1.7283636331558228, "learning_rate": 5.108387381369127e-06, "loss": 0.7995, "step": 14391 }, { "epoch": 0.5083513989641881, "grad_norm": 1.5657354593276978, "learning_rate": 5.107815505196024e-06, "loss": 0.8147, "step": 14392 }, { "epoch": 0.508386720767896, "grad_norm": 0.863082766532898, "learning_rate": 5.107243627611853e-06, "loss": 0.5422, "step": 14393 }, { "epoch": 0.508422042571604, "grad_norm": 1.7588262557983398, "learning_rate": 5.106671748624096e-06, "loss": 0.7656, "step": 14394 }, { "epoch": 0.5084573643753119, "grad_norm": 2.131953001022339, "learning_rate": 5.10609986824024e-06, "loss": 0.816, "step": 14395 }, { "epoch": 0.5084926861790198, "grad_norm": 2.0374574661254883, "learning_rate": 5.10552798646777e-06, "loss": 0.785, "step": 14396 }, { "epoch": 0.5085280079827277, "grad_norm": 1.6506969928741455, "learning_rate": 5.104956103314167e-06, "loss": 0.8099, "step": 14397 }, { "epoch": 0.5085633297864356, "grad_norm": 1.6304521560668945, "learning_rate": 5.10438421878692e-06, "loss": 0.7737, "step": 14398 }, { "epoch": 0.5085986515901435, "grad_norm": 1.9794886112213135, "learning_rate": 5.103812332893512e-06, "loss": 0.7732, "step": 14399 }, { "epoch": 0.5086339733938513, "grad_norm": 1.6660101413726807, "learning_rate": 5.1032404456414265e-06, "loss": 0.842, "step": 14400 }, { "epoch": 0.5086692951975592, "grad_norm": 1.9309985637664795, "learning_rate": 5.102668557038152e-06, "loss": 0.7977, "step": 14401 }, { "epoch": 0.5087046170012671, "grad_norm": 1.7319855690002441, "learning_rate": 5.102096667091168e-06, "loss": 0.7895, "step": 14402 }, { "epoch": 0.508739938804975, "grad_norm": 1.7159931659698486, "learning_rate": 5.1015247758079635e-06, "loss": 0.8291, "step": 14403 }, { "epoch": 0.5087752606086829, "grad_norm": 1.7741913795471191, "learning_rate": 5.100952883196022e-06, "loss": 0.8342, "step": 14404 }, { "epoch": 0.5088105824123909, "grad_norm": 1.5895076990127563, "learning_rate": 5.1003809892628266e-06, "loss": 0.8052, "step": 14405 }, { "epoch": 0.5088459042160988, "grad_norm": 1.6295890808105469, "learning_rate": 5.099809094015865e-06, "loss": 0.8021, "step": 14406 }, { "epoch": 0.5088812260198067, "grad_norm": 1.6205466985702515, "learning_rate": 5.099237197462621e-06, "loss": 0.7514, "step": 14407 }, { "epoch": 0.5089165478235146, "grad_norm": 1.883683443069458, "learning_rate": 5.098665299610578e-06, "loss": 0.7987, "step": 14408 }, { "epoch": 0.5089518696272225, "grad_norm": 1.5433613061904907, "learning_rate": 5.098093400467222e-06, "loss": 0.8033, "step": 14409 }, { "epoch": 0.5089871914309304, "grad_norm": 1.8997153043746948, "learning_rate": 5.097521500040039e-06, "loss": 0.8023, "step": 14410 }, { "epoch": 0.5090225132346383, "grad_norm": 1.7491631507873535, "learning_rate": 5.0969495983365135e-06, "loss": 0.7878, "step": 14411 }, { "epoch": 0.5090578350383462, "grad_norm": 2.9176688194274902, "learning_rate": 5.096377695364129e-06, "loss": 0.7913, "step": 14412 }, { "epoch": 0.5090931568420541, "grad_norm": 1.577577829360962, "learning_rate": 5.09580579113037e-06, "loss": 0.7952, "step": 14413 }, { "epoch": 0.509128478645762, "grad_norm": 2.1609787940979004, "learning_rate": 5.095233885642725e-06, "loss": 0.8493, "step": 14414 }, { "epoch": 0.50916380044947, "grad_norm": 1.5739068984985352, "learning_rate": 5.094661978908676e-06, "loss": 0.8066, "step": 14415 }, { "epoch": 0.5091991222531779, "grad_norm": 1.7744569778442383, "learning_rate": 5.094090070935709e-06, "loss": 0.806, "step": 14416 }, { "epoch": 0.5092344440568858, "grad_norm": 1.7926018238067627, "learning_rate": 5.093518161731309e-06, "loss": 0.7958, "step": 14417 }, { "epoch": 0.5092697658605937, "grad_norm": 1.768225073814392, "learning_rate": 5.092946251302959e-06, "loss": 0.7927, "step": 14418 }, { "epoch": 0.5093050876643016, "grad_norm": 1.6868687868118286, "learning_rate": 5.09237433965815e-06, "loss": 0.7869, "step": 14419 }, { "epoch": 0.5093404094680095, "grad_norm": 1.6810494661331177, "learning_rate": 5.091802426804359e-06, "loss": 0.8394, "step": 14420 }, { "epoch": 0.5093757312717174, "grad_norm": 1.699340581893921, "learning_rate": 5.091230512749077e-06, "loss": 0.8011, "step": 14421 }, { "epoch": 0.5094110530754253, "grad_norm": 1.9130330085754395, "learning_rate": 5.0906585974997866e-06, "loss": 0.8026, "step": 14422 }, { "epoch": 0.5094463748791332, "grad_norm": 1.8136452436447144, "learning_rate": 5.090086681063972e-06, "loss": 0.7966, "step": 14423 }, { "epoch": 0.5094816966828412, "grad_norm": 0.9471107125282288, "learning_rate": 5.089514763449123e-06, "loss": 0.5802, "step": 14424 }, { "epoch": 0.5095170184865491, "grad_norm": 0.9078757762908936, "learning_rate": 5.088942844662721e-06, "loss": 0.5999, "step": 14425 }, { "epoch": 0.5095523402902569, "grad_norm": 2.46175217628479, "learning_rate": 5.088370924712249e-06, "loss": 0.7599, "step": 14426 }, { "epoch": 0.5095876620939648, "grad_norm": 1.5867820978164673, "learning_rate": 5.087799003605198e-06, "loss": 0.7644, "step": 14427 }, { "epoch": 0.5096229838976727, "grad_norm": 1.660742998123169, "learning_rate": 5.087227081349048e-06, "loss": 0.8306, "step": 14428 }, { "epoch": 0.5096583057013806, "grad_norm": 2.465763568878174, "learning_rate": 5.086655157951289e-06, "loss": 0.7903, "step": 14429 }, { "epoch": 0.5096936275050885, "grad_norm": 1.8576828241348267, "learning_rate": 5.086083233419402e-06, "loss": 0.8172, "step": 14430 }, { "epoch": 0.5097289493087964, "grad_norm": 1.736913800239563, "learning_rate": 5.085511307760873e-06, "loss": 0.7962, "step": 14431 }, { "epoch": 0.5097642711125043, "grad_norm": 1.8130791187286377, "learning_rate": 5.08493938098319e-06, "loss": 0.8244, "step": 14432 }, { "epoch": 0.5097995929162122, "grad_norm": 1.8085330724716187, "learning_rate": 5.084367453093835e-06, "loss": 0.8256, "step": 14433 }, { "epoch": 0.5098349147199202, "grad_norm": 1.5784380435943604, "learning_rate": 5.0837955241002936e-06, "loss": 0.8122, "step": 14434 }, { "epoch": 0.5098702365236281, "grad_norm": 3.0545082092285156, "learning_rate": 5.083223594010054e-06, "loss": 0.7877, "step": 14435 }, { "epoch": 0.509905558327336, "grad_norm": 1.8076049089431763, "learning_rate": 5.082651662830599e-06, "loss": 0.8309, "step": 14436 }, { "epoch": 0.5099408801310439, "grad_norm": 1.6373155117034912, "learning_rate": 5.082079730569414e-06, "loss": 0.8007, "step": 14437 }, { "epoch": 0.5099762019347518, "grad_norm": 1.693190574645996, "learning_rate": 5.081507797233987e-06, "loss": 0.8094, "step": 14438 }, { "epoch": 0.5100115237384597, "grad_norm": 1.573835015296936, "learning_rate": 5.080935862831798e-06, "loss": 0.7914, "step": 14439 }, { "epoch": 0.5100468455421676, "grad_norm": 1.5681864023208618, "learning_rate": 5.080363927370339e-06, "loss": 0.8151, "step": 14440 }, { "epoch": 0.5100821673458755, "grad_norm": 1.4230413436889648, "learning_rate": 5.079791990857091e-06, "loss": 0.778, "step": 14441 }, { "epoch": 0.5101174891495834, "grad_norm": 1.6387852430343628, "learning_rate": 5.079220053299538e-06, "loss": 0.8019, "step": 14442 }, { "epoch": 0.5101528109532913, "grad_norm": 1.7143213748931885, "learning_rate": 5.07864811470517e-06, "loss": 0.8235, "step": 14443 }, { "epoch": 0.5101881327569993, "grad_norm": 0.986690878868103, "learning_rate": 5.078076175081469e-06, "loss": 0.5845, "step": 14444 }, { "epoch": 0.5102234545607072, "grad_norm": 1.7063652276992798, "learning_rate": 5.077504234435923e-06, "loss": 0.8588, "step": 14445 }, { "epoch": 0.5102587763644151, "grad_norm": 1.716249704360962, "learning_rate": 5.076932292776017e-06, "loss": 0.7692, "step": 14446 }, { "epoch": 0.510294098168123, "grad_norm": 1.7301087379455566, "learning_rate": 5.076360350109234e-06, "loss": 0.7618, "step": 14447 }, { "epoch": 0.5103294199718309, "grad_norm": 1.680946707725525, "learning_rate": 5.075788406443062e-06, "loss": 0.8068, "step": 14448 }, { "epoch": 0.5103647417755388, "grad_norm": 1.6524858474731445, "learning_rate": 5.075216461784984e-06, "loss": 0.7642, "step": 14449 }, { "epoch": 0.5104000635792467, "grad_norm": 1.5098828077316284, "learning_rate": 5.0746445161424885e-06, "loss": 0.7933, "step": 14450 }, { "epoch": 0.5104353853829546, "grad_norm": 1.6134339570999146, "learning_rate": 5.07407256952306e-06, "loss": 0.7858, "step": 14451 }, { "epoch": 0.5104707071866625, "grad_norm": 1.7462238073349, "learning_rate": 5.073500621934182e-06, "loss": 0.8008, "step": 14452 }, { "epoch": 0.5105060289903703, "grad_norm": 1.7990314960479736, "learning_rate": 5.072928673383343e-06, "loss": 0.7597, "step": 14453 }, { "epoch": 0.5105413507940783, "grad_norm": 1.7465794086456299, "learning_rate": 5.072356723878027e-06, "loss": 0.7866, "step": 14454 }, { "epoch": 0.5105766725977862, "grad_norm": 1.7232359647750854, "learning_rate": 5.071784773425719e-06, "loss": 0.8031, "step": 14455 }, { "epoch": 0.5106119944014941, "grad_norm": 2.0172276496887207, "learning_rate": 5.0712128220339075e-06, "loss": 0.8007, "step": 14456 }, { "epoch": 0.510647316205202, "grad_norm": 2.129610776901245, "learning_rate": 5.070640869710073e-06, "loss": 0.8274, "step": 14457 }, { "epoch": 0.5106826380089099, "grad_norm": 1.9398430585861206, "learning_rate": 5.070068916461705e-06, "loss": 0.8068, "step": 14458 }, { "epoch": 0.5107179598126178, "grad_norm": 2.345848560333252, "learning_rate": 5.06949696229629e-06, "loss": 0.834, "step": 14459 }, { "epoch": 0.5107532816163257, "grad_norm": 1.6371591091156006, "learning_rate": 5.068925007221309e-06, "loss": 0.7811, "step": 14460 }, { "epoch": 0.5107886034200336, "grad_norm": 1.7698365449905396, "learning_rate": 5.0683530512442535e-06, "loss": 0.8089, "step": 14461 }, { "epoch": 0.5108239252237415, "grad_norm": 0.8770014643669128, "learning_rate": 5.067781094372605e-06, "loss": 0.5941, "step": 14462 }, { "epoch": 0.5108592470274494, "grad_norm": 2.020183563232422, "learning_rate": 5.067209136613849e-06, "loss": 0.7701, "step": 14463 }, { "epoch": 0.5108945688311574, "grad_norm": 1.6748204231262207, "learning_rate": 5.066637177975473e-06, "loss": 0.8026, "step": 14464 }, { "epoch": 0.5109298906348653, "grad_norm": 2.127199172973633, "learning_rate": 5.066065218464963e-06, "loss": 0.7847, "step": 14465 }, { "epoch": 0.5109652124385732, "grad_norm": 1.7383259534835815, "learning_rate": 5.065493258089804e-06, "loss": 0.7933, "step": 14466 }, { "epoch": 0.5110005342422811, "grad_norm": 1.5951436758041382, "learning_rate": 5.064921296857481e-06, "loss": 0.7942, "step": 14467 }, { "epoch": 0.511035856045989, "grad_norm": 2.662250518798828, "learning_rate": 5.064349334775479e-06, "loss": 0.7857, "step": 14468 }, { "epoch": 0.5110711778496969, "grad_norm": 1.6715586185455322, "learning_rate": 5.0637773718512875e-06, "loss": 0.8177, "step": 14469 }, { "epoch": 0.5111064996534048, "grad_norm": 1.6975698471069336, "learning_rate": 5.063205408092388e-06, "loss": 0.7897, "step": 14470 }, { "epoch": 0.5111418214571127, "grad_norm": 2.0040760040283203, "learning_rate": 5.062633443506267e-06, "loss": 0.7926, "step": 14471 }, { "epoch": 0.5111771432608206, "grad_norm": 1.5018376111984253, "learning_rate": 5.062061478100414e-06, "loss": 0.7597, "step": 14472 }, { "epoch": 0.5112124650645286, "grad_norm": 1.5709301233291626, "learning_rate": 5.06148951188231e-06, "loss": 0.7816, "step": 14473 }, { "epoch": 0.5112477868682365, "grad_norm": 1.8865025043487549, "learning_rate": 5.060917544859444e-06, "loss": 0.8207, "step": 14474 }, { "epoch": 0.5112831086719444, "grad_norm": 1.6804850101470947, "learning_rate": 5.060345577039301e-06, "loss": 0.8059, "step": 14475 }, { "epoch": 0.5113184304756523, "grad_norm": 1.7448197603225708, "learning_rate": 5.059773608429365e-06, "loss": 0.7949, "step": 14476 }, { "epoch": 0.5113537522793602, "grad_norm": 1.8079063892364502, "learning_rate": 5.059201639037123e-06, "loss": 0.8043, "step": 14477 }, { "epoch": 0.5113890740830681, "grad_norm": 1.7005161046981812, "learning_rate": 5.058629668870062e-06, "loss": 0.8207, "step": 14478 }, { "epoch": 0.5114243958867759, "grad_norm": 1.6238123178482056, "learning_rate": 5.058057697935667e-06, "loss": 0.8203, "step": 14479 }, { "epoch": 0.5114597176904838, "grad_norm": 1.668747901916504, "learning_rate": 5.057485726241424e-06, "loss": 0.735, "step": 14480 }, { "epoch": 0.5114950394941917, "grad_norm": 1.6092432737350464, "learning_rate": 5.056913753794818e-06, "loss": 0.7791, "step": 14481 }, { "epoch": 0.5115303612978996, "grad_norm": 1.6930387020111084, "learning_rate": 5.056341780603335e-06, "loss": 0.7748, "step": 14482 }, { "epoch": 0.5115656831016075, "grad_norm": 1.6118505001068115, "learning_rate": 5.0557698066744634e-06, "loss": 0.7872, "step": 14483 }, { "epoch": 0.5116010049053155, "grad_norm": 1.7662221193313599, "learning_rate": 5.0551978320156854e-06, "loss": 0.7853, "step": 14484 }, { "epoch": 0.5116363267090234, "grad_norm": 1.6491295099258423, "learning_rate": 5.05462585663449e-06, "loss": 0.8614, "step": 14485 }, { "epoch": 0.5116716485127313, "grad_norm": 1.7429825067520142, "learning_rate": 5.054053880538359e-06, "loss": 0.7661, "step": 14486 }, { "epoch": 0.5117069703164392, "grad_norm": 1.8352375030517578, "learning_rate": 5.0534819037347825e-06, "loss": 0.7856, "step": 14487 }, { "epoch": 0.5117422921201471, "grad_norm": 1.7438102960586548, "learning_rate": 5.052909926231245e-06, "loss": 0.839, "step": 14488 }, { "epoch": 0.511777613923855, "grad_norm": 1.823033094406128, "learning_rate": 5.052337948035232e-06, "loss": 0.8105, "step": 14489 }, { "epoch": 0.5118129357275629, "grad_norm": 1.883733868598938, "learning_rate": 5.0517659691542305e-06, "loss": 0.8098, "step": 14490 }, { "epoch": 0.5118482575312708, "grad_norm": 1.8050321340560913, "learning_rate": 5.051193989595725e-06, "loss": 0.8046, "step": 14491 }, { "epoch": 0.5118835793349787, "grad_norm": 1.908110499382019, "learning_rate": 5.050622009367202e-06, "loss": 0.7918, "step": 14492 }, { "epoch": 0.5119189011386867, "grad_norm": 1.5696489810943604, "learning_rate": 5.05005002847615e-06, "loss": 0.7735, "step": 14493 }, { "epoch": 0.5119542229423946, "grad_norm": 1.5957190990447998, "learning_rate": 5.04947804693005e-06, "loss": 0.7928, "step": 14494 }, { "epoch": 0.5119895447461025, "grad_norm": 1.6425528526306152, "learning_rate": 5.048906064736391e-06, "loss": 0.7647, "step": 14495 }, { "epoch": 0.5120248665498104, "grad_norm": 1.794528841972351, "learning_rate": 5.048334081902659e-06, "loss": 0.7845, "step": 14496 }, { "epoch": 0.5120601883535183, "grad_norm": 1.902506709098816, "learning_rate": 5.047762098436339e-06, "loss": 0.8132, "step": 14497 }, { "epoch": 0.5120955101572262, "grad_norm": 1.7570990324020386, "learning_rate": 5.047190114344919e-06, "loss": 0.7653, "step": 14498 }, { "epoch": 0.5121308319609341, "grad_norm": 1.5940877199172974, "learning_rate": 5.046618129635883e-06, "loss": 0.7731, "step": 14499 }, { "epoch": 0.512166153764642, "grad_norm": 1.738063931465149, "learning_rate": 5.046046144316716e-06, "loss": 0.8248, "step": 14500 }, { "epoch": 0.5122014755683499, "grad_norm": 1.7722384929656982, "learning_rate": 5.045474158394909e-06, "loss": 0.8032, "step": 14501 }, { "epoch": 0.5122367973720579, "grad_norm": 1.8300390243530273, "learning_rate": 5.044902171877941e-06, "loss": 0.8063, "step": 14502 }, { "epoch": 0.5122721191757658, "grad_norm": 1.761591911315918, "learning_rate": 5.044330184773306e-06, "loss": 0.8117, "step": 14503 }, { "epoch": 0.5123074409794737, "grad_norm": 1.589214563369751, "learning_rate": 5.043758197088484e-06, "loss": 0.7627, "step": 14504 }, { "epoch": 0.5123427627831815, "grad_norm": 1.7115243673324585, "learning_rate": 5.0431862088309616e-06, "loss": 0.7815, "step": 14505 }, { "epoch": 0.5123780845868894, "grad_norm": 1.7755721807479858, "learning_rate": 5.042614220008227e-06, "loss": 0.8307, "step": 14506 }, { "epoch": 0.5124134063905973, "grad_norm": 1.7542915344238281, "learning_rate": 5.042042230627766e-06, "loss": 0.7925, "step": 14507 }, { "epoch": 0.5124487281943052, "grad_norm": 1.6833322048187256, "learning_rate": 5.041470240697064e-06, "loss": 0.806, "step": 14508 }, { "epoch": 0.5124840499980131, "grad_norm": 1.6777775287628174, "learning_rate": 5.040898250223608e-06, "loss": 0.8114, "step": 14509 }, { "epoch": 0.512519371801721, "grad_norm": 1.7070446014404297, "learning_rate": 5.040326259214882e-06, "loss": 0.7608, "step": 14510 }, { "epoch": 0.5125546936054289, "grad_norm": 1.813446044921875, "learning_rate": 5.039754267678374e-06, "loss": 0.7621, "step": 14511 }, { "epoch": 0.5125900154091368, "grad_norm": 2.0549817085266113, "learning_rate": 5.03918227562157e-06, "loss": 0.7905, "step": 14512 }, { "epoch": 0.5126253372128448, "grad_norm": 1.7020013332366943, "learning_rate": 5.038610283051956e-06, "loss": 0.7668, "step": 14513 }, { "epoch": 0.5126606590165527, "grad_norm": 1.8415606021881104, "learning_rate": 5.038038289977017e-06, "loss": 0.7756, "step": 14514 }, { "epoch": 0.5126959808202606, "grad_norm": 1.7405436038970947, "learning_rate": 5.03746629640424e-06, "loss": 0.8308, "step": 14515 }, { "epoch": 0.5127313026239685, "grad_norm": 2.4931182861328125, "learning_rate": 5.036894302341112e-06, "loss": 0.8292, "step": 14516 }, { "epoch": 0.5127666244276764, "grad_norm": 1.6425100564956665, "learning_rate": 5.036322307795118e-06, "loss": 0.8525, "step": 14517 }, { "epoch": 0.5128019462313843, "grad_norm": 1.8889771699905396, "learning_rate": 5.035750312773744e-06, "loss": 0.8045, "step": 14518 }, { "epoch": 0.5128372680350922, "grad_norm": 1.838716745376587, "learning_rate": 5.035178317284477e-06, "loss": 0.7564, "step": 14519 }, { "epoch": 0.5128725898388001, "grad_norm": 1.7197024822235107, "learning_rate": 5.034606321334803e-06, "loss": 0.7723, "step": 14520 }, { "epoch": 0.512907911642508, "grad_norm": 1.7199863195419312, "learning_rate": 5.034034324932208e-06, "loss": 0.8091, "step": 14521 }, { "epoch": 0.512943233446216, "grad_norm": 1.6418763399124146, "learning_rate": 5.033462328084178e-06, "loss": 0.7893, "step": 14522 }, { "epoch": 0.5129785552499239, "grad_norm": 1.5712549686431885, "learning_rate": 5.032890330798199e-06, "loss": 0.7676, "step": 14523 }, { "epoch": 0.5130138770536318, "grad_norm": 1.6065781116485596, "learning_rate": 5.032318333081757e-06, "loss": 0.7832, "step": 14524 }, { "epoch": 0.5130491988573397, "grad_norm": 1.7942273616790771, "learning_rate": 5.0317463349423404e-06, "loss": 0.8, "step": 14525 }, { "epoch": 0.5130845206610476, "grad_norm": 1.5622458457946777, "learning_rate": 5.031174336387432e-06, "loss": 0.7898, "step": 14526 }, { "epoch": 0.5131198424647555, "grad_norm": 1.5140583515167236, "learning_rate": 5.03060233742452e-06, "loss": 0.7629, "step": 14527 }, { "epoch": 0.5131551642684634, "grad_norm": 1.6910690069198608, "learning_rate": 5.030030338061093e-06, "loss": 0.7663, "step": 14528 }, { "epoch": 0.5131904860721713, "grad_norm": 1.5065841674804688, "learning_rate": 5.0294583383046305e-06, "loss": 0.7653, "step": 14529 }, { "epoch": 0.5132258078758792, "grad_norm": 1.752388834953308, "learning_rate": 5.0288863381626265e-06, "loss": 0.8062, "step": 14530 }, { "epoch": 0.513261129679587, "grad_norm": 1.8852585554122925, "learning_rate": 5.028314337642561e-06, "loss": 0.786, "step": 14531 }, { "epoch": 0.513296451483295, "grad_norm": 1.5802892446517944, "learning_rate": 5.027742336751924e-06, "loss": 0.8117, "step": 14532 }, { "epoch": 0.5133317732870029, "grad_norm": 1.645582675933838, "learning_rate": 5.027170335498201e-06, "loss": 0.7707, "step": 14533 }, { "epoch": 0.5133670950907108, "grad_norm": 2.2297873497009277, "learning_rate": 5.026598333888876e-06, "loss": 0.8156, "step": 14534 }, { "epoch": 0.5134024168944187, "grad_norm": 1.6305391788482666, "learning_rate": 5.026026331931437e-06, "loss": 0.7891, "step": 14535 }, { "epoch": 0.5134377386981266, "grad_norm": 1.6384252309799194, "learning_rate": 5.025454329633372e-06, "loss": 0.7917, "step": 14536 }, { "epoch": 0.5134730605018345, "grad_norm": 1.8324730396270752, "learning_rate": 5.024882327002164e-06, "loss": 0.8383, "step": 14537 }, { "epoch": 0.5135083823055424, "grad_norm": 1.7899094820022583, "learning_rate": 5.024310324045302e-06, "loss": 0.7738, "step": 14538 }, { "epoch": 0.5135437041092503, "grad_norm": 1.9655413627624512, "learning_rate": 5.0237383207702705e-06, "loss": 0.7783, "step": 14539 }, { "epoch": 0.5135790259129582, "grad_norm": 1.6700093746185303, "learning_rate": 5.023166317184557e-06, "loss": 0.7758, "step": 14540 }, { "epoch": 0.5136143477166661, "grad_norm": 2.3250346183776855, "learning_rate": 5.022594313295646e-06, "loss": 0.8048, "step": 14541 }, { "epoch": 0.513649669520374, "grad_norm": 1.6095348596572876, "learning_rate": 5.0220223091110256e-06, "loss": 0.7642, "step": 14542 }, { "epoch": 0.513684991324082, "grad_norm": 1.696997880935669, "learning_rate": 5.02145030463818e-06, "loss": 0.7983, "step": 14543 }, { "epoch": 0.5137203131277899, "grad_norm": 1.6707711219787598, "learning_rate": 5.020878299884597e-06, "loss": 0.8217, "step": 14544 }, { "epoch": 0.5137556349314978, "grad_norm": 0.9748136401176453, "learning_rate": 5.020306294857764e-06, "loss": 0.569, "step": 14545 }, { "epoch": 0.5137909567352057, "grad_norm": 1.887418508529663, "learning_rate": 5.019734289565167e-06, "loss": 0.813, "step": 14546 }, { "epoch": 0.5138262785389136, "grad_norm": 1.6195855140686035, "learning_rate": 5.019162284014289e-06, "loss": 0.7792, "step": 14547 }, { "epoch": 0.5138616003426215, "grad_norm": 1.6847333908081055, "learning_rate": 5.01859027821262e-06, "loss": 0.7803, "step": 14548 }, { "epoch": 0.5138969221463294, "grad_norm": 1.767886757850647, "learning_rate": 5.018018272167644e-06, "loss": 0.8062, "step": 14549 }, { "epoch": 0.5139322439500373, "grad_norm": 1.905967354774475, "learning_rate": 5.0174462658868495e-06, "loss": 0.8301, "step": 14550 }, { "epoch": 0.5139675657537452, "grad_norm": 1.6455609798431396, "learning_rate": 5.016874259377721e-06, "loss": 0.7859, "step": 14551 }, { "epoch": 0.5140028875574532, "grad_norm": 2.015310287475586, "learning_rate": 5.016302252647745e-06, "loss": 0.7891, "step": 14552 }, { "epoch": 0.5140382093611611, "grad_norm": 1.970056414604187, "learning_rate": 5.015730245704409e-06, "loss": 0.761, "step": 14553 }, { "epoch": 0.514073531164869, "grad_norm": 1.6601351499557495, "learning_rate": 5.015158238555198e-06, "loss": 0.7925, "step": 14554 }, { "epoch": 0.5141088529685769, "grad_norm": 1.7152949571609497, "learning_rate": 5.0145862312075976e-06, "loss": 0.7987, "step": 14555 }, { "epoch": 0.5141441747722848, "grad_norm": 1.5511761903762817, "learning_rate": 5.014014223669097e-06, "loss": 0.7867, "step": 14556 }, { "epoch": 0.5141794965759926, "grad_norm": 1.4942454099655151, "learning_rate": 5.013442215947181e-06, "loss": 0.7494, "step": 14557 }, { "epoch": 0.5142148183797005, "grad_norm": 1.6227726936340332, "learning_rate": 5.012870208049335e-06, "loss": 0.791, "step": 14558 }, { "epoch": 0.5142501401834084, "grad_norm": 1.6990599632263184, "learning_rate": 5.012298199983046e-06, "loss": 0.7966, "step": 14559 }, { "epoch": 0.5142854619871163, "grad_norm": 1.7313448190689087, "learning_rate": 5.0117261917558e-06, "loss": 0.772, "step": 14560 }, { "epoch": 0.5143207837908242, "grad_norm": 1.7056853771209717, "learning_rate": 5.0111541833750856e-06, "loss": 0.7999, "step": 14561 }, { "epoch": 0.5143561055945322, "grad_norm": 1.6531380414962769, "learning_rate": 5.010582174848386e-06, "loss": 0.8103, "step": 14562 }, { "epoch": 0.5143914273982401, "grad_norm": 2.86146879196167, "learning_rate": 5.010010166183189e-06, "loss": 0.8013, "step": 14563 }, { "epoch": 0.514426749201948, "grad_norm": 1.6619646549224854, "learning_rate": 5.009438157386981e-06, "loss": 0.778, "step": 14564 }, { "epoch": 0.5144620710056559, "grad_norm": 1.6937614679336548, "learning_rate": 5.008866148467247e-06, "loss": 0.8126, "step": 14565 }, { "epoch": 0.5144973928093638, "grad_norm": 1.6276276111602783, "learning_rate": 5.008294139431477e-06, "loss": 0.794, "step": 14566 }, { "epoch": 0.5145327146130717, "grad_norm": 1.669463872909546, "learning_rate": 5.0077221302871534e-06, "loss": 0.7794, "step": 14567 }, { "epoch": 0.5145680364167796, "grad_norm": 1.7319716215133667, "learning_rate": 5.007150121041764e-06, "loss": 0.773, "step": 14568 }, { "epoch": 0.5146033582204875, "grad_norm": 1.669758915901184, "learning_rate": 5.0065781117027955e-06, "loss": 0.7895, "step": 14569 }, { "epoch": 0.5146386800241954, "grad_norm": 1.616729974746704, "learning_rate": 5.006006102277733e-06, "loss": 0.7508, "step": 14570 }, { "epoch": 0.5146740018279033, "grad_norm": 1.784433364868164, "learning_rate": 5.005434092774065e-06, "loss": 0.7728, "step": 14571 }, { "epoch": 0.5147093236316113, "grad_norm": 1.745969533920288, "learning_rate": 5.004862083199276e-06, "loss": 0.8225, "step": 14572 }, { "epoch": 0.5147446454353192, "grad_norm": 1.62075936794281, "learning_rate": 5.004290073560852e-06, "loss": 0.7961, "step": 14573 }, { "epoch": 0.5147799672390271, "grad_norm": 1.8592138290405273, "learning_rate": 5.003718063866282e-06, "loss": 0.8012, "step": 14574 }, { "epoch": 0.514815289042735, "grad_norm": 2.3846609592437744, "learning_rate": 5.00314605412305e-06, "loss": 0.8243, "step": 14575 }, { "epoch": 0.5148506108464429, "grad_norm": 1.6684143543243408, "learning_rate": 5.002574044338644e-06, "loss": 0.78, "step": 14576 }, { "epoch": 0.5148859326501508, "grad_norm": 1.7275583744049072, "learning_rate": 5.002002034520549e-06, "loss": 0.7592, "step": 14577 }, { "epoch": 0.5149212544538587, "grad_norm": 1.726393222808838, "learning_rate": 5.0014300246762505e-06, "loss": 0.7816, "step": 14578 }, { "epoch": 0.5149565762575666, "grad_norm": 1.55092453956604, "learning_rate": 5.0008580148132365e-06, "loss": 0.7799, "step": 14579 }, { "epoch": 0.5149918980612745, "grad_norm": 1.9175126552581787, "learning_rate": 5.000286004938994e-06, "loss": 0.7995, "step": 14580 }, { "epoch": 0.5150272198649825, "grad_norm": 1.7024562358856201, "learning_rate": 4.999713995061008e-06, "loss": 0.787, "step": 14581 }, { "epoch": 0.5150625416686904, "grad_norm": 1.5204548835754395, "learning_rate": 4.999141985186766e-06, "loss": 0.7836, "step": 14582 }, { "epoch": 0.5150978634723982, "grad_norm": 0.912628173828125, "learning_rate": 4.998569975323751e-06, "loss": 0.5915, "step": 14583 }, { "epoch": 0.5151331852761061, "grad_norm": 2.0711910724639893, "learning_rate": 4.997997965479453e-06, "loss": 0.8247, "step": 14584 }, { "epoch": 0.515168507079814, "grad_norm": 1.9081059694290161, "learning_rate": 4.997425955661358e-06, "loss": 0.7736, "step": 14585 }, { "epoch": 0.5152038288835219, "grad_norm": 1.8236809968948364, "learning_rate": 4.996853945876951e-06, "loss": 0.7637, "step": 14586 }, { "epoch": 0.5152391506872298, "grad_norm": 1.7692054510116577, "learning_rate": 4.996281936133719e-06, "loss": 0.784, "step": 14587 }, { "epoch": 0.5152744724909377, "grad_norm": 1.6595879793167114, "learning_rate": 4.9957099264391486e-06, "loss": 0.8112, "step": 14588 }, { "epoch": 0.5153097942946456, "grad_norm": 1.9218345880508423, "learning_rate": 4.995137916800725e-06, "loss": 0.8197, "step": 14589 }, { "epoch": 0.5153451160983535, "grad_norm": 1.6238375902175903, "learning_rate": 4.994565907225936e-06, "loss": 0.7869, "step": 14590 }, { "epoch": 0.5153804379020615, "grad_norm": 0.9308726191520691, "learning_rate": 4.9939938977222685e-06, "loss": 0.567, "step": 14591 }, { "epoch": 0.5154157597057694, "grad_norm": 2.2940142154693604, "learning_rate": 4.993421888297205e-06, "loss": 0.8044, "step": 14592 }, { "epoch": 0.5154510815094773, "grad_norm": 1.8562005758285522, "learning_rate": 4.992849878958237e-06, "loss": 0.7913, "step": 14593 }, { "epoch": 0.5154864033131852, "grad_norm": 1.7795512676239014, "learning_rate": 4.992277869712847e-06, "loss": 0.8388, "step": 14594 }, { "epoch": 0.5155217251168931, "grad_norm": 1.563247561454773, "learning_rate": 4.991705860568524e-06, "loss": 0.779, "step": 14595 }, { "epoch": 0.515557046920601, "grad_norm": 1.6153925657272339, "learning_rate": 4.991133851532753e-06, "loss": 0.7852, "step": 14596 }, { "epoch": 0.5155923687243089, "grad_norm": 1.6617079973220825, "learning_rate": 4.99056184261302e-06, "loss": 0.8021, "step": 14597 }, { "epoch": 0.5156276905280168, "grad_norm": 1.559457540512085, "learning_rate": 4.989989833816812e-06, "loss": 0.7797, "step": 14598 }, { "epoch": 0.5156630123317247, "grad_norm": 1.692650318145752, "learning_rate": 4.989417825151616e-06, "loss": 0.7872, "step": 14599 }, { "epoch": 0.5156983341354326, "grad_norm": 0.8820961713790894, "learning_rate": 4.988845816624918e-06, "loss": 0.562, "step": 14600 }, { "epoch": 0.5157336559391406, "grad_norm": 1.7434451580047607, "learning_rate": 4.988273808244202e-06, "loss": 0.8042, "step": 14601 }, { "epoch": 0.5157689777428485, "grad_norm": 1.716495394706726, "learning_rate": 4.987701800016956e-06, "loss": 0.7856, "step": 14602 }, { "epoch": 0.5158042995465564, "grad_norm": 1.6881839036941528, "learning_rate": 4.987129791950668e-06, "loss": 0.7749, "step": 14603 }, { "epoch": 0.5158396213502643, "grad_norm": 1.5806771516799927, "learning_rate": 4.986557784052821e-06, "loss": 0.8211, "step": 14604 }, { "epoch": 0.5158749431539722, "grad_norm": 1.5463263988494873, "learning_rate": 4.985985776330905e-06, "loss": 0.7685, "step": 14605 }, { "epoch": 0.5159102649576801, "grad_norm": 1.513663649559021, "learning_rate": 4.985413768792404e-06, "loss": 0.8168, "step": 14606 }, { "epoch": 0.515945586761388, "grad_norm": 1.8072724342346191, "learning_rate": 4.9848417614448036e-06, "loss": 0.787, "step": 14607 }, { "epoch": 0.5159809085650959, "grad_norm": 1.7393712997436523, "learning_rate": 4.984269754295593e-06, "loss": 0.796, "step": 14608 }, { "epoch": 0.5160162303688037, "grad_norm": 1.6890203952789307, "learning_rate": 4.983697747352257e-06, "loss": 0.7836, "step": 14609 }, { "epoch": 0.5160515521725116, "grad_norm": 1.6597228050231934, "learning_rate": 4.98312574062228e-06, "loss": 0.8079, "step": 14610 }, { "epoch": 0.5160868739762196, "grad_norm": 1.6300079822540283, "learning_rate": 4.982553734113152e-06, "loss": 0.8198, "step": 14611 }, { "epoch": 0.5161221957799275, "grad_norm": 1.7159782648086548, "learning_rate": 4.981981727832357e-06, "loss": 0.8144, "step": 14612 }, { "epoch": 0.5161575175836354, "grad_norm": 2.076181650161743, "learning_rate": 4.981409721787381e-06, "loss": 0.7756, "step": 14613 }, { "epoch": 0.5161928393873433, "grad_norm": 1.7636851072311401, "learning_rate": 4.980837715985711e-06, "loss": 0.7568, "step": 14614 }, { "epoch": 0.5162281611910512, "grad_norm": 1.7010153532028198, "learning_rate": 4.980265710434835e-06, "loss": 0.8076, "step": 14615 }, { "epoch": 0.5162634829947591, "grad_norm": 1.5597478151321411, "learning_rate": 4.979693705142236e-06, "loss": 0.7805, "step": 14616 }, { "epoch": 0.516298804798467, "grad_norm": 1.658571481704712, "learning_rate": 4.979121700115402e-06, "loss": 0.7648, "step": 14617 }, { "epoch": 0.5163341266021749, "grad_norm": 1.5970678329467773, "learning_rate": 4.978549695361821e-06, "loss": 0.7616, "step": 14618 }, { "epoch": 0.5163694484058828, "grad_norm": 1.6405034065246582, "learning_rate": 4.977977690888978e-06, "loss": 0.7825, "step": 14619 }, { "epoch": 0.5164047702095907, "grad_norm": 1.5866132974624634, "learning_rate": 4.977405686704357e-06, "loss": 0.7707, "step": 14620 }, { "epoch": 0.5164400920132987, "grad_norm": 1.7406563758850098, "learning_rate": 4.976833682815446e-06, "loss": 0.827, "step": 14621 }, { "epoch": 0.5164754138170066, "grad_norm": 1.6288868188858032, "learning_rate": 4.976261679229732e-06, "loss": 0.7908, "step": 14622 }, { "epoch": 0.5165107356207145, "grad_norm": 1.5864667892456055, "learning_rate": 4.9756896759547e-06, "loss": 0.8097, "step": 14623 }, { "epoch": 0.5165460574244224, "grad_norm": 1.4792197942733765, "learning_rate": 4.975117672997838e-06, "loss": 0.7854, "step": 14624 }, { "epoch": 0.5165813792281303, "grad_norm": 1.7655248641967773, "learning_rate": 4.974545670366631e-06, "loss": 0.8286, "step": 14625 }, { "epoch": 0.5166167010318382, "grad_norm": 2.312570333480835, "learning_rate": 4.973973668068564e-06, "loss": 0.789, "step": 14626 }, { "epoch": 0.5166520228355461, "grad_norm": 1.844232439994812, "learning_rate": 4.973401666111126e-06, "loss": 0.7858, "step": 14627 }, { "epoch": 0.516687344639254, "grad_norm": 1.6624853610992432, "learning_rate": 4.972829664501801e-06, "loss": 0.798, "step": 14628 }, { "epoch": 0.5167226664429619, "grad_norm": 1.6102993488311768, "learning_rate": 4.972257663248077e-06, "loss": 0.7788, "step": 14629 }, { "epoch": 0.5167579882466699, "grad_norm": 1.7370253801345825, "learning_rate": 4.97168566235744e-06, "loss": 0.7805, "step": 14630 }, { "epoch": 0.5167933100503778, "grad_norm": 1.8086837530136108, "learning_rate": 4.971113661837375e-06, "loss": 0.8094, "step": 14631 }, { "epoch": 0.5168286318540857, "grad_norm": 1.8788269758224487, "learning_rate": 4.9705416616953695e-06, "loss": 0.8228, "step": 14632 }, { "epoch": 0.5168639536577936, "grad_norm": 2.2943673133850098, "learning_rate": 4.9699696619389096e-06, "loss": 0.7941, "step": 14633 }, { "epoch": 0.5168992754615015, "grad_norm": 1.8595813512802124, "learning_rate": 4.969397662575479e-06, "loss": 0.8178, "step": 14634 }, { "epoch": 0.5169345972652093, "grad_norm": 1.6930315494537354, "learning_rate": 4.9688256636125685e-06, "loss": 0.7824, "step": 14635 }, { "epoch": 0.5169699190689172, "grad_norm": 1.9210784435272217, "learning_rate": 4.968253665057662e-06, "loss": 0.7892, "step": 14636 }, { "epoch": 0.5170052408726251, "grad_norm": 1.564746618270874, "learning_rate": 4.967681666918245e-06, "loss": 0.817, "step": 14637 }, { "epoch": 0.517040562676333, "grad_norm": 1.8586174249649048, "learning_rate": 4.967109669201803e-06, "loss": 0.7991, "step": 14638 }, { "epoch": 0.5170758844800409, "grad_norm": 1.807805061340332, "learning_rate": 4.9665376719158245e-06, "loss": 0.7854, "step": 14639 }, { "epoch": 0.5171112062837488, "grad_norm": 1.7768452167510986, "learning_rate": 4.9659656750677945e-06, "loss": 0.8031, "step": 14640 }, { "epoch": 0.5171465280874568, "grad_norm": 1.569961667060852, "learning_rate": 4.965393678665199e-06, "loss": 0.8015, "step": 14641 }, { "epoch": 0.5171818498911647, "grad_norm": 1.5917749404907227, "learning_rate": 4.964821682715524e-06, "loss": 0.7886, "step": 14642 }, { "epoch": 0.5172171716948726, "grad_norm": 1.6876827478408813, "learning_rate": 4.964249687226258e-06, "loss": 0.7585, "step": 14643 }, { "epoch": 0.5172524934985805, "grad_norm": 1.6441806554794312, "learning_rate": 4.963677692204883e-06, "loss": 0.7948, "step": 14644 }, { "epoch": 0.5172878153022884, "grad_norm": 1.7703235149383545, "learning_rate": 4.963105697658889e-06, "loss": 0.8073, "step": 14645 }, { "epoch": 0.5173231371059963, "grad_norm": 1.796578049659729, "learning_rate": 4.962533703595762e-06, "loss": 0.7956, "step": 14646 }, { "epoch": 0.5173584589097042, "grad_norm": 1.641671061515808, "learning_rate": 4.961961710022984e-06, "loss": 0.7938, "step": 14647 }, { "epoch": 0.5173937807134121, "grad_norm": 1.7377172708511353, "learning_rate": 4.961389716948046e-06, "loss": 0.8089, "step": 14648 }, { "epoch": 0.51742910251712, "grad_norm": 1.9218717813491821, "learning_rate": 4.960817724378432e-06, "loss": 0.8074, "step": 14649 }, { "epoch": 0.517464424320828, "grad_norm": 1.9232561588287354, "learning_rate": 4.960245732321627e-06, "loss": 0.8296, "step": 14650 }, { "epoch": 0.5174997461245359, "grad_norm": 1.638245701789856, "learning_rate": 4.959673740785118e-06, "loss": 0.8049, "step": 14651 }, { "epoch": 0.5175350679282438, "grad_norm": 1.589483618736267, "learning_rate": 4.959101749776393e-06, "loss": 0.7679, "step": 14652 }, { "epoch": 0.5175703897319517, "grad_norm": 1.667425274848938, "learning_rate": 4.958529759302936e-06, "loss": 0.7909, "step": 14653 }, { "epoch": 0.5176057115356596, "grad_norm": 1.5398199558258057, "learning_rate": 4.957957769372235e-06, "loss": 0.7873, "step": 14654 }, { "epoch": 0.5176410333393675, "grad_norm": 1.6575332880020142, "learning_rate": 4.957385779991774e-06, "loss": 0.8025, "step": 14655 }, { "epoch": 0.5176763551430754, "grad_norm": 1.6096535921096802, "learning_rate": 4.956813791169041e-06, "loss": 0.8046, "step": 14656 }, { "epoch": 0.5177116769467833, "grad_norm": 1.6503872871398926, "learning_rate": 4.956241802911519e-06, "loss": 0.7512, "step": 14657 }, { "epoch": 0.5177469987504912, "grad_norm": 1.6875370740890503, "learning_rate": 4.955669815226697e-06, "loss": 0.8443, "step": 14658 }, { "epoch": 0.5177823205541991, "grad_norm": 1.696637511253357, "learning_rate": 4.9550978281220595e-06, "loss": 0.8155, "step": 14659 }, { "epoch": 0.5178176423579071, "grad_norm": 1.7737493515014648, "learning_rate": 4.954525841605093e-06, "loss": 0.7981, "step": 14660 }, { "epoch": 0.5178529641616149, "grad_norm": 1.8472288846969604, "learning_rate": 4.953953855683285e-06, "loss": 0.8332, "step": 14661 }, { "epoch": 0.5178882859653228, "grad_norm": 1.7386243343353271, "learning_rate": 4.953381870364119e-06, "loss": 0.7945, "step": 14662 }, { "epoch": 0.5179236077690307, "grad_norm": 1.5018863677978516, "learning_rate": 4.952809885655083e-06, "loss": 0.7918, "step": 14663 }, { "epoch": 0.5179589295727386, "grad_norm": 1.6708886623382568, "learning_rate": 4.952237901563662e-06, "loss": 0.7803, "step": 14664 }, { "epoch": 0.5179942513764465, "grad_norm": 1.7143932580947876, "learning_rate": 4.951665918097342e-06, "loss": 0.7953, "step": 14665 }, { "epoch": 0.5180295731801544, "grad_norm": 1.9337388277053833, "learning_rate": 4.95109393526361e-06, "loss": 0.8195, "step": 14666 }, { "epoch": 0.5180648949838623, "grad_norm": 1.6294612884521484, "learning_rate": 4.950521953069952e-06, "loss": 0.7905, "step": 14667 }, { "epoch": 0.5181002167875702, "grad_norm": 1.7659447193145752, "learning_rate": 4.949949971523852e-06, "loss": 0.7863, "step": 14668 }, { "epoch": 0.5181355385912781, "grad_norm": 1.9048763513565063, "learning_rate": 4.949377990632798e-06, "loss": 0.7506, "step": 14669 }, { "epoch": 0.518170860394986, "grad_norm": 1.7403916120529175, "learning_rate": 4.9488060104042765e-06, "loss": 0.809, "step": 14670 }, { "epoch": 0.518206182198694, "grad_norm": 1.6765228509902954, "learning_rate": 4.94823403084577e-06, "loss": 0.8282, "step": 14671 }, { "epoch": 0.5182415040024019, "grad_norm": 1.7666889429092407, "learning_rate": 4.947662051964769e-06, "loss": 0.8395, "step": 14672 }, { "epoch": 0.5182768258061098, "grad_norm": 1.6043117046356201, "learning_rate": 4.9470900737687564e-06, "loss": 0.7863, "step": 14673 }, { "epoch": 0.5183121476098177, "grad_norm": 1.7586084604263306, "learning_rate": 4.94651809626522e-06, "loss": 0.7943, "step": 14674 }, { "epoch": 0.5183474694135256, "grad_norm": 1.7328616380691528, "learning_rate": 4.945946119461642e-06, "loss": 0.7751, "step": 14675 }, { "epoch": 0.5183827912172335, "grad_norm": 1.68661367893219, "learning_rate": 4.945374143365513e-06, "loss": 0.8019, "step": 14676 }, { "epoch": 0.5184181130209414, "grad_norm": 1.567778468132019, "learning_rate": 4.944802167984317e-06, "loss": 0.7705, "step": 14677 }, { "epoch": 0.5184534348246493, "grad_norm": 1.6520851850509644, "learning_rate": 4.944230193325539e-06, "loss": 0.7627, "step": 14678 }, { "epoch": 0.5184887566283573, "grad_norm": 1.5874946117401123, "learning_rate": 4.943658219396666e-06, "loss": 0.764, "step": 14679 }, { "epoch": 0.5185240784320652, "grad_norm": 1.6580251455307007, "learning_rate": 4.943086246205184e-06, "loss": 0.785, "step": 14680 }, { "epoch": 0.5185594002357731, "grad_norm": 1.670035719871521, "learning_rate": 4.942514273758577e-06, "loss": 0.7642, "step": 14681 }, { "epoch": 0.518594722039481, "grad_norm": 1.5622491836547852, "learning_rate": 4.941942302064334e-06, "loss": 0.7991, "step": 14682 }, { "epoch": 0.5186300438431889, "grad_norm": 1.665959358215332, "learning_rate": 4.941370331129939e-06, "loss": 0.7896, "step": 14683 }, { "epoch": 0.5186653656468968, "grad_norm": 1.8403315544128418, "learning_rate": 4.940798360962878e-06, "loss": 0.7658, "step": 14684 }, { "epoch": 0.5187006874506047, "grad_norm": 1.6217838525772095, "learning_rate": 4.940226391570637e-06, "loss": 0.7744, "step": 14685 }, { "epoch": 0.5187360092543126, "grad_norm": 2.109051465988159, "learning_rate": 4.939654422960701e-06, "loss": 0.7829, "step": 14686 }, { "epoch": 0.5187713310580204, "grad_norm": 1.6011404991149902, "learning_rate": 4.939082455140557e-06, "loss": 0.7877, "step": 14687 }, { "epoch": 0.5188066528617283, "grad_norm": 1.5641670227050781, "learning_rate": 4.938510488117691e-06, "loss": 0.7737, "step": 14688 }, { "epoch": 0.5188419746654362, "grad_norm": 1.617049217224121, "learning_rate": 4.937938521899587e-06, "loss": 0.7774, "step": 14689 }, { "epoch": 0.5188772964691442, "grad_norm": 0.9890633225440979, "learning_rate": 4.937366556493733e-06, "loss": 0.5828, "step": 14690 }, { "epoch": 0.5189126182728521, "grad_norm": 1.8757433891296387, "learning_rate": 4.936794591907613e-06, "loss": 0.7994, "step": 14691 }, { "epoch": 0.51894794007656, "grad_norm": 1.5699129104614258, "learning_rate": 4.936222628148716e-06, "loss": 0.7664, "step": 14692 }, { "epoch": 0.5189832618802679, "grad_norm": 1.5929903984069824, "learning_rate": 4.935650665224523e-06, "loss": 0.7838, "step": 14693 }, { "epoch": 0.5190185836839758, "grad_norm": 1.6635150909423828, "learning_rate": 4.935078703142522e-06, "loss": 0.7864, "step": 14694 }, { "epoch": 0.5190539054876837, "grad_norm": 1.729308009147644, "learning_rate": 4.9345067419101985e-06, "loss": 0.7753, "step": 14695 }, { "epoch": 0.5190892272913916, "grad_norm": 1.0203138589859009, "learning_rate": 4.933934781535039e-06, "loss": 0.5854, "step": 14696 }, { "epoch": 0.5191245490950995, "grad_norm": 1.4617607593536377, "learning_rate": 4.9333628220245285e-06, "loss": 0.7721, "step": 14697 }, { "epoch": 0.5191598708988074, "grad_norm": 1.7805006504058838, "learning_rate": 4.932790863386153e-06, "loss": 0.8169, "step": 14698 }, { "epoch": 0.5191951927025154, "grad_norm": 1.7522542476654053, "learning_rate": 4.932218905627397e-06, "loss": 0.813, "step": 14699 }, { "epoch": 0.5192305145062233, "grad_norm": 1.7111543416976929, "learning_rate": 4.931646948755748e-06, "loss": 0.7605, "step": 14700 }, { "epoch": 0.5192658363099312, "grad_norm": 1.8302885293960571, "learning_rate": 4.931074992778692e-06, "loss": 0.8276, "step": 14701 }, { "epoch": 0.5193011581136391, "grad_norm": 1.6073366403579712, "learning_rate": 4.930503037703712e-06, "loss": 0.7755, "step": 14702 }, { "epoch": 0.519336479917347, "grad_norm": 1.7669050693511963, "learning_rate": 4.929931083538296e-06, "loss": 0.8026, "step": 14703 }, { "epoch": 0.5193718017210549, "grad_norm": 1.5850404500961304, "learning_rate": 4.929359130289929e-06, "loss": 0.7786, "step": 14704 }, { "epoch": 0.5194071235247628, "grad_norm": 1.632615089416504, "learning_rate": 4.928787177966094e-06, "loss": 0.7809, "step": 14705 }, { "epoch": 0.5194424453284707, "grad_norm": 1.987396001815796, "learning_rate": 4.928215226574282e-06, "loss": 0.8194, "step": 14706 }, { "epoch": 0.5194777671321786, "grad_norm": 1.6174845695495605, "learning_rate": 4.927643276121974e-06, "loss": 0.7969, "step": 14707 }, { "epoch": 0.5195130889358865, "grad_norm": 1.5969562530517578, "learning_rate": 4.927071326616658e-06, "loss": 0.7972, "step": 14708 }, { "epoch": 0.5195484107395945, "grad_norm": 2.184617757797241, "learning_rate": 4.926499378065819e-06, "loss": 0.7664, "step": 14709 }, { "epoch": 0.5195837325433024, "grad_norm": 1.6837246417999268, "learning_rate": 4.9259274304769426e-06, "loss": 0.7724, "step": 14710 }, { "epoch": 0.5196190543470103, "grad_norm": 1.5198557376861572, "learning_rate": 4.925355483857514e-06, "loss": 0.7562, "step": 14711 }, { "epoch": 0.5196543761507182, "grad_norm": 1.6935995817184448, "learning_rate": 4.924783538215017e-06, "loss": 0.7533, "step": 14712 }, { "epoch": 0.519689697954426, "grad_norm": 1.5570907592773438, "learning_rate": 4.9242115935569405e-06, "loss": 0.7988, "step": 14713 }, { "epoch": 0.5197250197581339, "grad_norm": 1.592010736465454, "learning_rate": 4.9236396498907685e-06, "loss": 0.7732, "step": 14714 }, { "epoch": 0.5197603415618418, "grad_norm": 1.678019404411316, "learning_rate": 4.923067707223985e-06, "loss": 0.822, "step": 14715 }, { "epoch": 0.5197956633655497, "grad_norm": 1.5598769187927246, "learning_rate": 4.922495765564078e-06, "loss": 0.812, "step": 14716 }, { "epoch": 0.5198309851692576, "grad_norm": 1.6407866477966309, "learning_rate": 4.921923824918532e-06, "loss": 0.8094, "step": 14717 }, { "epoch": 0.5198663069729655, "grad_norm": 1.5108071565628052, "learning_rate": 4.921351885294831e-06, "loss": 0.7589, "step": 14718 }, { "epoch": 0.5199016287766735, "grad_norm": 1.5900346040725708, "learning_rate": 4.9207799467004635e-06, "loss": 0.7686, "step": 14719 }, { "epoch": 0.5199369505803814, "grad_norm": 1.7232029438018799, "learning_rate": 4.920208009142912e-06, "loss": 0.7755, "step": 14720 }, { "epoch": 0.5199722723840893, "grad_norm": 1.7292872667312622, "learning_rate": 4.919636072629663e-06, "loss": 0.7906, "step": 14721 }, { "epoch": 0.5200075941877972, "grad_norm": 1.7084763050079346, "learning_rate": 4.919064137168203e-06, "loss": 0.7995, "step": 14722 }, { "epoch": 0.5200429159915051, "grad_norm": 1.6553832292556763, "learning_rate": 4.918492202766015e-06, "loss": 0.8002, "step": 14723 }, { "epoch": 0.520078237795213, "grad_norm": 1.6713672876358032, "learning_rate": 4.917920269430586e-06, "loss": 0.7971, "step": 14724 }, { "epoch": 0.5201135595989209, "grad_norm": 1.6199209690093994, "learning_rate": 4.917348337169402e-06, "loss": 0.8089, "step": 14725 }, { "epoch": 0.5201488814026288, "grad_norm": 1.528428077697754, "learning_rate": 4.916776405989947e-06, "loss": 0.7798, "step": 14726 }, { "epoch": 0.5201842032063367, "grad_norm": 1.6724523305892944, "learning_rate": 4.916204475899707e-06, "loss": 0.805, "step": 14727 }, { "epoch": 0.5202195250100446, "grad_norm": 1.7192696332931519, "learning_rate": 4.915632546906167e-06, "loss": 0.7961, "step": 14728 }, { "epoch": 0.5202548468137526, "grad_norm": 1.6406617164611816, "learning_rate": 4.915060619016814e-06, "loss": 0.7875, "step": 14729 }, { "epoch": 0.5202901686174605, "grad_norm": 1.7527673244476318, "learning_rate": 4.914488692239129e-06, "loss": 0.7861, "step": 14730 }, { "epoch": 0.5203254904211684, "grad_norm": 1.5519285202026367, "learning_rate": 4.9139167665806e-06, "loss": 0.7822, "step": 14731 }, { "epoch": 0.5203608122248763, "grad_norm": 1.635103464126587, "learning_rate": 4.9133448420487136e-06, "loss": 0.7844, "step": 14732 }, { "epoch": 0.5203961340285842, "grad_norm": 1.6101948022842407, "learning_rate": 4.9127729186509525e-06, "loss": 0.7864, "step": 14733 }, { "epoch": 0.5204314558322921, "grad_norm": 1.463831901550293, "learning_rate": 4.912200996394804e-06, "loss": 0.7819, "step": 14734 }, { "epoch": 0.520466777636, "grad_norm": 1.5666288137435913, "learning_rate": 4.911629075287752e-06, "loss": 0.7675, "step": 14735 }, { "epoch": 0.5205020994397079, "grad_norm": 1.6201235055923462, "learning_rate": 4.911057155337281e-06, "loss": 0.7938, "step": 14736 }, { "epoch": 0.5205374212434158, "grad_norm": 1.7041250467300415, "learning_rate": 4.910485236550878e-06, "loss": 0.7946, "step": 14737 }, { "epoch": 0.5205727430471238, "grad_norm": 0.9350462555885315, "learning_rate": 4.909913318936029e-06, "loss": 0.5782, "step": 14738 }, { "epoch": 0.5206080648508316, "grad_norm": 1.811021327972412, "learning_rate": 4.909341402500214e-06, "loss": 0.7599, "step": 14739 }, { "epoch": 0.5206433866545395, "grad_norm": 1.6121920347213745, "learning_rate": 4.9087694872509245e-06, "loss": 0.7935, "step": 14740 }, { "epoch": 0.5206787084582474, "grad_norm": 1.758913516998291, "learning_rate": 4.908197573195643e-06, "loss": 0.7948, "step": 14741 }, { "epoch": 0.5207140302619553, "grad_norm": 1.5904902219772339, "learning_rate": 4.907625660341852e-06, "loss": 0.7839, "step": 14742 }, { "epoch": 0.5207493520656632, "grad_norm": 1.5544097423553467, "learning_rate": 4.907053748697041e-06, "loss": 0.793, "step": 14743 }, { "epoch": 0.5207846738693711, "grad_norm": 1.5938904285430908, "learning_rate": 4.9064818382686915e-06, "loss": 0.8066, "step": 14744 }, { "epoch": 0.520819995673079, "grad_norm": 1.749340534210205, "learning_rate": 4.905909929064291e-06, "loss": 0.749, "step": 14745 }, { "epoch": 0.5208553174767869, "grad_norm": 1.647428274154663, "learning_rate": 4.905338021091325e-06, "loss": 0.8122, "step": 14746 }, { "epoch": 0.5208906392804948, "grad_norm": 1.5814673900604248, "learning_rate": 4.904766114357277e-06, "loss": 0.8191, "step": 14747 }, { "epoch": 0.5209259610842027, "grad_norm": 1.841211199760437, "learning_rate": 4.9041942088696315e-06, "loss": 0.7503, "step": 14748 }, { "epoch": 0.5209612828879107, "grad_norm": 1.57097589969635, "learning_rate": 4.903622304635873e-06, "loss": 0.8032, "step": 14749 }, { "epoch": 0.5209966046916186, "grad_norm": 1.8674150705337524, "learning_rate": 4.903050401663489e-06, "loss": 0.8182, "step": 14750 }, { "epoch": 0.5210319264953265, "grad_norm": 1.804818868637085, "learning_rate": 4.902478499959963e-06, "loss": 0.8188, "step": 14751 }, { "epoch": 0.5210672482990344, "grad_norm": 1.558147668838501, "learning_rate": 4.901906599532779e-06, "loss": 0.7546, "step": 14752 }, { "epoch": 0.5211025701027423, "grad_norm": 1.6825928688049316, "learning_rate": 4.901334700389424e-06, "loss": 0.7588, "step": 14753 }, { "epoch": 0.5211378919064502, "grad_norm": 1.517719030380249, "learning_rate": 4.900762802537381e-06, "loss": 0.7694, "step": 14754 }, { "epoch": 0.5211732137101581, "grad_norm": 1.736993670463562, "learning_rate": 4.900190905984136e-06, "loss": 0.8101, "step": 14755 }, { "epoch": 0.521208535513866, "grad_norm": 1.6084893941879272, "learning_rate": 4.899619010737175e-06, "loss": 0.7707, "step": 14756 }, { "epoch": 0.5212438573175739, "grad_norm": 1.6858876943588257, "learning_rate": 4.899047116803979e-06, "loss": 0.8217, "step": 14757 }, { "epoch": 0.5212791791212819, "grad_norm": 3.4789955615997314, "learning_rate": 4.898475224192037e-06, "loss": 0.8209, "step": 14758 }, { "epoch": 0.5213145009249898, "grad_norm": 1.8265944719314575, "learning_rate": 4.897903332908833e-06, "loss": 0.7653, "step": 14759 }, { "epoch": 0.5213498227286977, "grad_norm": 1.7576569318771362, "learning_rate": 4.89733144296185e-06, "loss": 0.8136, "step": 14760 }, { "epoch": 0.5213851445324056, "grad_norm": 2.0557796955108643, "learning_rate": 4.8967595543585735e-06, "loss": 0.7771, "step": 14761 }, { "epoch": 0.5214204663361135, "grad_norm": 1.8140017986297607, "learning_rate": 4.89618766710649e-06, "loss": 0.8008, "step": 14762 }, { "epoch": 0.5214557881398214, "grad_norm": 1.8277957439422607, "learning_rate": 4.89561578121308e-06, "loss": 0.7844, "step": 14763 }, { "epoch": 0.5214911099435293, "grad_norm": 1.5510975122451782, "learning_rate": 4.895043896685833e-06, "loss": 0.7941, "step": 14764 }, { "epoch": 0.5215264317472371, "grad_norm": 1.5820256471633911, "learning_rate": 4.894472013532232e-06, "loss": 0.7771, "step": 14765 }, { "epoch": 0.521561753550945, "grad_norm": 1.554916501045227, "learning_rate": 4.893900131759763e-06, "loss": 0.7732, "step": 14766 }, { "epoch": 0.5215970753546529, "grad_norm": 1.6370915174484253, "learning_rate": 4.893328251375906e-06, "loss": 0.7915, "step": 14767 }, { "epoch": 0.5216323971583608, "grad_norm": 1.5569199323654175, "learning_rate": 4.89275637238815e-06, "loss": 0.7764, "step": 14768 }, { "epoch": 0.5216677189620688, "grad_norm": 1.630190134048462, "learning_rate": 4.8921844948039785e-06, "loss": 0.7599, "step": 14769 }, { "epoch": 0.5217030407657767, "grad_norm": 1.847331166267395, "learning_rate": 4.891612618630875e-06, "loss": 0.8387, "step": 14770 }, { "epoch": 0.5217383625694846, "grad_norm": 1.760217308998108, "learning_rate": 4.891040743876326e-06, "loss": 0.8165, "step": 14771 }, { "epoch": 0.5217736843731925, "grad_norm": 1.5492202043533325, "learning_rate": 4.890468870547816e-06, "loss": 0.7818, "step": 14772 }, { "epoch": 0.5218090061769004, "grad_norm": 1.5650932788848877, "learning_rate": 4.889896998652827e-06, "loss": 0.801, "step": 14773 }, { "epoch": 0.5218443279806083, "grad_norm": 1.5425622463226318, "learning_rate": 4.8893251281988465e-06, "loss": 0.7719, "step": 14774 }, { "epoch": 0.5218796497843162, "grad_norm": 1.623599648475647, "learning_rate": 4.888753259193359e-06, "loss": 0.7903, "step": 14775 }, { "epoch": 0.5219149715880241, "grad_norm": 1.5074621438980103, "learning_rate": 4.888181391643846e-06, "loss": 0.7846, "step": 14776 }, { "epoch": 0.521950293391732, "grad_norm": 1.6505221128463745, "learning_rate": 4.887609525557795e-06, "loss": 0.7712, "step": 14777 }, { "epoch": 0.52198561519544, "grad_norm": 1.7368921041488647, "learning_rate": 4.887037660942688e-06, "loss": 0.7678, "step": 14778 }, { "epoch": 0.5220209369991479, "grad_norm": 1.4998852014541626, "learning_rate": 4.886465797806013e-06, "loss": 0.7746, "step": 14779 }, { "epoch": 0.5220562588028558, "grad_norm": 1.625968098640442, "learning_rate": 4.885893936155252e-06, "loss": 0.7902, "step": 14780 }, { "epoch": 0.5220915806065637, "grad_norm": 1.6599609851837158, "learning_rate": 4.885322075997887e-06, "loss": 0.7695, "step": 14781 }, { "epoch": 0.5221269024102716, "grad_norm": 1.671874761581421, "learning_rate": 4.884750217341408e-06, "loss": 0.7857, "step": 14782 }, { "epoch": 0.5221622242139795, "grad_norm": 1.6376264095306396, "learning_rate": 4.884178360193296e-06, "loss": 0.8216, "step": 14783 }, { "epoch": 0.5221975460176874, "grad_norm": 1.670651912689209, "learning_rate": 4.883606504561037e-06, "loss": 0.7801, "step": 14784 }, { "epoch": 0.5222328678213953, "grad_norm": 1.580176830291748, "learning_rate": 4.883034650452113e-06, "loss": 0.7592, "step": 14785 }, { "epoch": 0.5222681896251032, "grad_norm": 2.31836199760437, "learning_rate": 4.882462797874008e-06, "loss": 0.8092, "step": 14786 }, { "epoch": 0.5223035114288112, "grad_norm": 1.801429271697998, "learning_rate": 4.8818909468342095e-06, "loss": 0.7925, "step": 14787 }, { "epoch": 0.5223388332325191, "grad_norm": 1.6871167421340942, "learning_rate": 4.881319097340199e-06, "loss": 0.8202, "step": 14788 }, { "epoch": 0.522374155036227, "grad_norm": 1.6941286325454712, "learning_rate": 4.880747249399463e-06, "loss": 0.7929, "step": 14789 }, { "epoch": 0.5224094768399349, "grad_norm": 1.6585521697998047, "learning_rate": 4.880175403019484e-06, "loss": 0.8213, "step": 14790 }, { "epoch": 0.5224447986436427, "grad_norm": 1.6732428073883057, "learning_rate": 4.879603558207746e-06, "loss": 0.7783, "step": 14791 }, { "epoch": 0.5224801204473506, "grad_norm": 1.9322727918624878, "learning_rate": 4.8790317149717355e-06, "loss": 0.778, "step": 14792 }, { "epoch": 0.5225154422510585, "grad_norm": 1.9374202489852905, "learning_rate": 4.8784598733189346e-06, "loss": 0.7851, "step": 14793 }, { "epoch": 0.5225507640547664, "grad_norm": 1.7439411878585815, "learning_rate": 4.877888033256828e-06, "loss": 0.8042, "step": 14794 }, { "epoch": 0.5225860858584743, "grad_norm": 1.7914036512374878, "learning_rate": 4.877316194792899e-06, "loss": 0.8115, "step": 14795 }, { "epoch": 0.5226214076621822, "grad_norm": 0.9700219035148621, "learning_rate": 4.876744357934634e-06, "loss": 0.6145, "step": 14796 }, { "epoch": 0.5226567294658901, "grad_norm": 1.837859869003296, "learning_rate": 4.876172522689514e-06, "loss": 0.7926, "step": 14797 }, { "epoch": 0.5226920512695981, "grad_norm": 1.8693907260894775, "learning_rate": 4.8756006890650265e-06, "loss": 0.8072, "step": 14798 }, { "epoch": 0.522727373073306, "grad_norm": 1.6640459299087524, "learning_rate": 4.875028857068654e-06, "loss": 0.8057, "step": 14799 }, { "epoch": 0.5227626948770139, "grad_norm": 0.9279845356941223, "learning_rate": 4.8744570267078776e-06, "loss": 0.5923, "step": 14800 }, { "epoch": 0.5227980166807218, "grad_norm": 1.7180521488189697, "learning_rate": 4.873885197990186e-06, "loss": 0.7719, "step": 14801 }, { "epoch": 0.5228333384844297, "grad_norm": 1.9660029411315918, "learning_rate": 4.8733133709230605e-06, "loss": 0.7881, "step": 14802 }, { "epoch": 0.5228686602881376, "grad_norm": 1.8761754035949707, "learning_rate": 4.872741545513989e-06, "loss": 0.7962, "step": 14803 }, { "epoch": 0.5229039820918455, "grad_norm": 1.6502538919448853, "learning_rate": 4.872169721770449e-06, "loss": 0.7742, "step": 14804 }, { "epoch": 0.5229393038955534, "grad_norm": 1.6003494262695312, "learning_rate": 4.871597899699929e-06, "loss": 0.8328, "step": 14805 }, { "epoch": 0.5229746256992613, "grad_norm": 1.6564782857894897, "learning_rate": 4.871026079309911e-06, "loss": 0.8073, "step": 14806 }, { "epoch": 0.5230099475029693, "grad_norm": 1.854169487953186, "learning_rate": 4.870454260607878e-06, "loss": 0.8174, "step": 14807 }, { "epoch": 0.5230452693066772, "grad_norm": 1.6946520805358887, "learning_rate": 4.869882443601317e-06, "loss": 0.7901, "step": 14808 }, { "epoch": 0.5230805911103851, "grad_norm": 1.757338047027588, "learning_rate": 4.86931062829771e-06, "loss": 0.8119, "step": 14809 }, { "epoch": 0.523115912914093, "grad_norm": 1.9920679330825806, "learning_rate": 4.868738814704539e-06, "loss": 0.8176, "step": 14810 }, { "epoch": 0.5231512347178009, "grad_norm": 2.0935003757476807, "learning_rate": 4.8681670028292925e-06, "loss": 0.7855, "step": 14811 }, { "epoch": 0.5231865565215088, "grad_norm": 1.6056480407714844, "learning_rate": 4.867595192679449e-06, "loss": 0.7994, "step": 14812 }, { "epoch": 0.5232218783252167, "grad_norm": 1.770490288734436, "learning_rate": 4.867023384262497e-06, "loss": 0.7811, "step": 14813 }, { "epoch": 0.5232572001289246, "grad_norm": 1.6255192756652832, "learning_rate": 4.866451577585916e-06, "loss": 0.7735, "step": 14814 }, { "epoch": 0.5232925219326325, "grad_norm": 1.6556074619293213, "learning_rate": 4.865879772657193e-06, "loss": 0.799, "step": 14815 }, { "epoch": 0.5233278437363404, "grad_norm": 1.9721850156784058, "learning_rate": 4.865307969483809e-06, "loss": 0.8138, "step": 14816 }, { "epoch": 0.5233631655400482, "grad_norm": 1.8109968900680542, "learning_rate": 4.864736168073251e-06, "loss": 0.8023, "step": 14817 }, { "epoch": 0.5233984873437562, "grad_norm": 1.5762983560562134, "learning_rate": 4.8641643684329984e-06, "loss": 0.8015, "step": 14818 }, { "epoch": 0.5234338091474641, "grad_norm": 1.7304538488388062, "learning_rate": 4.863592570570539e-06, "loss": 0.7884, "step": 14819 }, { "epoch": 0.523469130951172, "grad_norm": 1.6264376640319824, "learning_rate": 4.8630207744933535e-06, "loss": 0.7907, "step": 14820 }, { "epoch": 0.5235044527548799, "grad_norm": 1.6446950435638428, "learning_rate": 4.862448980208928e-06, "loss": 0.7395, "step": 14821 }, { "epoch": 0.5235397745585878, "grad_norm": 2.0573618412017822, "learning_rate": 4.861877187724742e-06, "loss": 0.77, "step": 14822 }, { "epoch": 0.5235750963622957, "grad_norm": 2.05816650390625, "learning_rate": 4.861305397048282e-06, "loss": 0.7912, "step": 14823 }, { "epoch": 0.5236104181660036, "grad_norm": 1.6409896612167358, "learning_rate": 4.8607336081870315e-06, "loss": 0.8064, "step": 14824 }, { "epoch": 0.5236457399697115, "grad_norm": 1.6505571603775024, "learning_rate": 4.860161821148472e-06, "loss": 0.8482, "step": 14825 }, { "epoch": 0.5236810617734194, "grad_norm": 1.5640002489089966, "learning_rate": 4.85959003594009e-06, "loss": 0.774, "step": 14826 }, { "epoch": 0.5237163835771274, "grad_norm": 1.8119486570358276, "learning_rate": 4.859018252569366e-06, "loss": 0.8453, "step": 14827 }, { "epoch": 0.5237517053808353, "grad_norm": 1.6137069463729858, "learning_rate": 4.8584464710437845e-06, "loss": 0.7727, "step": 14828 }, { "epoch": 0.5237870271845432, "grad_norm": 1.6312271356582642, "learning_rate": 4.8578746913708305e-06, "loss": 0.7701, "step": 14829 }, { "epoch": 0.5238223489882511, "grad_norm": 1.7121113538742065, "learning_rate": 4.857302913557985e-06, "loss": 0.783, "step": 14830 }, { "epoch": 0.523857670791959, "grad_norm": 0.9778971076011658, "learning_rate": 4.856731137612731e-06, "loss": 0.5528, "step": 14831 }, { "epoch": 0.5238929925956669, "grad_norm": 1.7744650840759277, "learning_rate": 4.8561593635425545e-06, "loss": 0.7792, "step": 14832 }, { "epoch": 0.5239283143993748, "grad_norm": 1.742286205291748, "learning_rate": 4.855587591354937e-06, "loss": 0.7648, "step": 14833 }, { "epoch": 0.5239636362030827, "grad_norm": 1.8365705013275146, "learning_rate": 4.85501582105736e-06, "loss": 0.7809, "step": 14834 }, { "epoch": 0.5239989580067906, "grad_norm": 1.6119358539581299, "learning_rate": 4.854444052657311e-06, "loss": 0.7867, "step": 14835 }, { "epoch": 0.5240342798104985, "grad_norm": 1.489560842514038, "learning_rate": 4.8538722861622685e-06, "loss": 0.7987, "step": 14836 }, { "epoch": 0.5240696016142065, "grad_norm": 1.6357322931289673, "learning_rate": 4.853300521579721e-06, "loss": 0.8174, "step": 14837 }, { "epoch": 0.5241049234179144, "grad_norm": 1.862273931503296, "learning_rate": 4.8527287589171474e-06, "loss": 0.8031, "step": 14838 }, { "epoch": 0.5241402452216223, "grad_norm": 1.5832910537719727, "learning_rate": 4.852156998182031e-06, "loss": 0.7804, "step": 14839 }, { "epoch": 0.5241755670253302, "grad_norm": 1.6000880002975464, "learning_rate": 4.85158523938186e-06, "loss": 0.8244, "step": 14840 }, { "epoch": 0.5242108888290381, "grad_norm": 1.7813228368759155, "learning_rate": 4.8510134825241095e-06, "loss": 0.8145, "step": 14841 }, { "epoch": 0.524246210632746, "grad_norm": 2.0562283992767334, "learning_rate": 4.850441727616268e-06, "loss": 0.8396, "step": 14842 }, { "epoch": 0.5242815324364538, "grad_norm": 1.6262271404266357, "learning_rate": 4.8498699746658175e-06, "loss": 0.7804, "step": 14843 }, { "epoch": 0.5243168542401617, "grad_norm": 1.660929560661316, "learning_rate": 4.849298223680239e-06, "loss": 0.7653, "step": 14844 }, { "epoch": 0.5243521760438696, "grad_norm": 1.773328423500061, "learning_rate": 4.848726474667017e-06, "loss": 0.7865, "step": 14845 }, { "epoch": 0.5243874978475775, "grad_norm": 1.760658621788025, "learning_rate": 4.848154727633635e-06, "loss": 0.8187, "step": 14846 }, { "epoch": 0.5244228196512855, "grad_norm": 1.7253602743148804, "learning_rate": 4.847582982587576e-06, "loss": 0.7696, "step": 14847 }, { "epoch": 0.5244581414549934, "grad_norm": 1.6181635856628418, "learning_rate": 4.847011239536322e-06, "loss": 0.8114, "step": 14848 }, { "epoch": 0.5244934632587013, "grad_norm": 1.8967896699905396, "learning_rate": 4.8464394984873555e-06, "loss": 0.8163, "step": 14849 }, { "epoch": 0.5245287850624092, "grad_norm": 1.587736964225769, "learning_rate": 4.845867759448161e-06, "loss": 0.7638, "step": 14850 }, { "epoch": 0.5245641068661171, "grad_norm": 1.8414462804794312, "learning_rate": 4.8452960224262195e-06, "loss": 0.8026, "step": 14851 }, { "epoch": 0.524599428669825, "grad_norm": 1.5936168432235718, "learning_rate": 4.844724287429015e-06, "loss": 0.8097, "step": 14852 }, { "epoch": 0.5246347504735329, "grad_norm": 1.6603270769119263, "learning_rate": 4.84415255446403e-06, "loss": 0.7799, "step": 14853 }, { "epoch": 0.5246700722772408, "grad_norm": 2.006589412689209, "learning_rate": 4.843580823538747e-06, "loss": 0.835, "step": 14854 }, { "epoch": 0.5247053940809487, "grad_norm": 1.6721537113189697, "learning_rate": 4.843009094660647e-06, "loss": 0.7818, "step": 14855 }, { "epoch": 0.5247407158846566, "grad_norm": 1.6294517517089844, "learning_rate": 4.842437367837217e-06, "loss": 0.8198, "step": 14856 }, { "epoch": 0.5247760376883646, "grad_norm": 1.504002332687378, "learning_rate": 4.841865643075936e-06, "loss": 0.7959, "step": 14857 }, { "epoch": 0.5248113594920725, "grad_norm": 1.8037817478179932, "learning_rate": 4.841293920384289e-06, "loss": 0.7891, "step": 14858 }, { "epoch": 0.5248466812957804, "grad_norm": 1.8289557695388794, "learning_rate": 4.840722199769755e-06, "loss": 0.7963, "step": 14859 }, { "epoch": 0.5248820030994883, "grad_norm": 1.6516060829162598, "learning_rate": 4.8401504812398205e-06, "loss": 0.7805, "step": 14860 }, { "epoch": 0.5249173249031962, "grad_norm": 1.6429100036621094, "learning_rate": 4.839578764801967e-06, "loss": 0.7668, "step": 14861 }, { "epoch": 0.5249526467069041, "grad_norm": 1.5780415534973145, "learning_rate": 4.839007050463674e-06, "loss": 0.752, "step": 14862 }, { "epoch": 0.524987968510612, "grad_norm": 0.8495854735374451, "learning_rate": 4.838435338232429e-06, "loss": 0.5947, "step": 14863 }, { "epoch": 0.5250232903143199, "grad_norm": 1.584550142288208, "learning_rate": 4.837863628115711e-06, "loss": 0.7638, "step": 14864 }, { "epoch": 0.5250586121180278, "grad_norm": 1.7043850421905518, "learning_rate": 4.837291920121003e-06, "loss": 0.767, "step": 14865 }, { "epoch": 0.5250939339217358, "grad_norm": 1.8205903768539429, "learning_rate": 4.8367202142557875e-06, "loss": 0.7895, "step": 14866 }, { "epoch": 0.5251292557254437, "grad_norm": 2.0221920013427734, "learning_rate": 4.836148510527548e-06, "loss": 0.7945, "step": 14867 }, { "epoch": 0.5251645775291516, "grad_norm": 1.6238486766815186, "learning_rate": 4.8355768089437654e-06, "loss": 0.7949, "step": 14868 }, { "epoch": 0.5251998993328595, "grad_norm": 1.7188260555267334, "learning_rate": 4.835005109511923e-06, "loss": 0.7902, "step": 14869 }, { "epoch": 0.5252352211365673, "grad_norm": 1.7332508563995361, "learning_rate": 4.834433412239502e-06, "loss": 0.8285, "step": 14870 }, { "epoch": 0.5252705429402752, "grad_norm": 1.743497610092163, "learning_rate": 4.8338617171339866e-06, "loss": 0.7986, "step": 14871 }, { "epoch": 0.5253058647439831, "grad_norm": 1.7225971221923828, "learning_rate": 4.833290024202857e-06, "loss": 0.7775, "step": 14872 }, { "epoch": 0.525341186547691, "grad_norm": 1.6818829774856567, "learning_rate": 4.832718333453595e-06, "loss": 0.7879, "step": 14873 }, { "epoch": 0.5253765083513989, "grad_norm": 1.5868322849273682, "learning_rate": 4.832146644893687e-06, "loss": 0.8061, "step": 14874 }, { "epoch": 0.5254118301551068, "grad_norm": 1.6487864255905151, "learning_rate": 4.831574958530611e-06, "loss": 0.7623, "step": 14875 }, { "epoch": 0.5254471519588148, "grad_norm": 1.739551305770874, "learning_rate": 4.831003274371851e-06, "loss": 0.7703, "step": 14876 }, { "epoch": 0.5254824737625227, "grad_norm": 1.752231478691101, "learning_rate": 4.830431592424888e-06, "loss": 0.7841, "step": 14877 }, { "epoch": 0.5255177955662306, "grad_norm": 1.5266109704971313, "learning_rate": 4.829859912697202e-06, "loss": 0.7461, "step": 14878 }, { "epoch": 0.5255531173699385, "grad_norm": 2.0972580909729004, "learning_rate": 4.82928823519628e-06, "loss": 0.785, "step": 14879 }, { "epoch": 0.5255884391736464, "grad_norm": 1.7042702436447144, "learning_rate": 4.8287165599296e-06, "loss": 0.7867, "step": 14880 }, { "epoch": 0.5256237609773543, "grad_norm": 1.764237642288208, "learning_rate": 4.828144886904646e-06, "loss": 0.7811, "step": 14881 }, { "epoch": 0.5256590827810622, "grad_norm": 1.8364652395248413, "learning_rate": 4.827573216128899e-06, "loss": 0.8328, "step": 14882 }, { "epoch": 0.5256944045847701, "grad_norm": 1.5920088291168213, "learning_rate": 4.827001547609841e-06, "loss": 0.7868, "step": 14883 }, { "epoch": 0.525729726388478, "grad_norm": 1.87506902217865, "learning_rate": 4.826429881354956e-06, "loss": 0.7713, "step": 14884 }, { "epoch": 0.525765048192186, "grad_norm": 1.5551629066467285, "learning_rate": 4.825858217371723e-06, "loss": 0.7651, "step": 14885 }, { "epoch": 0.5258003699958939, "grad_norm": 1.8240996599197388, "learning_rate": 4.825286555667624e-06, "loss": 0.7897, "step": 14886 }, { "epoch": 0.5258356917996018, "grad_norm": 1.7836380004882812, "learning_rate": 4.824714896250144e-06, "loss": 0.8065, "step": 14887 }, { "epoch": 0.5258710136033097, "grad_norm": 4.519360065460205, "learning_rate": 4.824143239126762e-06, "loss": 0.8099, "step": 14888 }, { "epoch": 0.5259063354070176, "grad_norm": 2.064340114593506, "learning_rate": 4.823571584304958e-06, "loss": 0.815, "step": 14889 }, { "epoch": 0.5259416572107255, "grad_norm": 1.7165945768356323, "learning_rate": 4.822999931792218e-06, "loss": 0.7991, "step": 14890 }, { "epoch": 0.5259769790144334, "grad_norm": 1.7872880697250366, "learning_rate": 4.822428281596022e-06, "loss": 0.8061, "step": 14891 }, { "epoch": 0.5260123008181413, "grad_norm": 1.8421612977981567, "learning_rate": 4.821856633723849e-06, "loss": 0.8276, "step": 14892 }, { "epoch": 0.5260476226218492, "grad_norm": 1.6447702646255493, "learning_rate": 4.8212849881831845e-06, "loss": 0.7711, "step": 14893 }, { "epoch": 0.5260829444255571, "grad_norm": 1.6784814596176147, "learning_rate": 4.8207133449815075e-06, "loss": 0.7828, "step": 14894 }, { "epoch": 0.526118266229265, "grad_norm": 1.6515045166015625, "learning_rate": 4.820141704126304e-06, "loss": 0.8102, "step": 14895 }, { "epoch": 0.5261535880329729, "grad_norm": 1.5420581102371216, "learning_rate": 4.819570065625048e-06, "loss": 0.7973, "step": 14896 }, { "epoch": 0.5261889098366808, "grad_norm": 1.9043625593185425, "learning_rate": 4.818998429485227e-06, "loss": 0.7783, "step": 14897 }, { "epoch": 0.5262242316403887, "grad_norm": 1.9132673740386963, "learning_rate": 4.818426795714321e-06, "loss": 0.7894, "step": 14898 }, { "epoch": 0.5262595534440966, "grad_norm": 1.601302981376648, "learning_rate": 4.817855164319809e-06, "loss": 0.8018, "step": 14899 }, { "epoch": 0.5262948752478045, "grad_norm": 1.7088637351989746, "learning_rate": 4.817283535309175e-06, "loss": 0.7759, "step": 14900 }, { "epoch": 0.5263301970515124, "grad_norm": 1.5890917778015137, "learning_rate": 4.8167119086899e-06, "loss": 0.7803, "step": 14901 }, { "epoch": 0.5263655188552203, "grad_norm": 2.8411102294921875, "learning_rate": 4.816140284469464e-06, "loss": 0.8145, "step": 14902 }, { "epoch": 0.5264008406589282, "grad_norm": 1.6453428268432617, "learning_rate": 4.815568662655351e-06, "loss": 0.76, "step": 14903 }, { "epoch": 0.5264361624626361, "grad_norm": 1.7230013608932495, "learning_rate": 4.81499704325504e-06, "loss": 0.8016, "step": 14904 }, { "epoch": 0.526471484266344, "grad_norm": 1.7786588668823242, "learning_rate": 4.814425426276013e-06, "loss": 0.7984, "step": 14905 }, { "epoch": 0.526506806070052, "grad_norm": 1.8518340587615967, "learning_rate": 4.813853811725751e-06, "loss": 0.7805, "step": 14906 }, { "epoch": 0.5265421278737599, "grad_norm": 1.585243821144104, "learning_rate": 4.813282199611735e-06, "loss": 0.7709, "step": 14907 }, { "epoch": 0.5265774496774678, "grad_norm": 1.7611502408981323, "learning_rate": 4.812710589941448e-06, "loss": 0.8306, "step": 14908 }, { "epoch": 0.5266127714811757, "grad_norm": 1.7397557497024536, "learning_rate": 4.8121389827223686e-06, "loss": 0.7969, "step": 14909 }, { "epoch": 0.5266480932848836, "grad_norm": 1.8356330394744873, "learning_rate": 4.811567377961978e-06, "loss": 0.772, "step": 14910 }, { "epoch": 0.5266834150885915, "grad_norm": 1.988250732421875, "learning_rate": 4.810995775667759e-06, "loss": 0.7438, "step": 14911 }, { "epoch": 0.5267187368922994, "grad_norm": 1.718565821647644, "learning_rate": 4.810424175847193e-06, "loss": 0.825, "step": 14912 }, { "epoch": 0.5267540586960073, "grad_norm": 1.4413902759552002, "learning_rate": 4.809852578507759e-06, "loss": 0.7452, "step": 14913 }, { "epoch": 0.5267893804997152, "grad_norm": 1.9236019849777222, "learning_rate": 4.809280983656938e-06, "loss": 0.7891, "step": 14914 }, { "epoch": 0.5268247023034232, "grad_norm": 1.7460191249847412, "learning_rate": 4.808709391302212e-06, "loss": 0.7834, "step": 14915 }, { "epoch": 0.5268600241071311, "grad_norm": 1.680620551109314, "learning_rate": 4.808137801451062e-06, "loss": 0.7917, "step": 14916 }, { "epoch": 0.526895345910839, "grad_norm": 1.6269806623458862, "learning_rate": 4.807566214110966e-06, "loss": 0.7663, "step": 14917 }, { "epoch": 0.5269306677145469, "grad_norm": 1.6953052282333374, "learning_rate": 4.80699462928941e-06, "loss": 0.8205, "step": 14918 }, { "epoch": 0.5269659895182548, "grad_norm": 1.8809932470321655, "learning_rate": 4.806423046993871e-06, "loss": 0.8098, "step": 14919 }, { "epoch": 0.5270013113219627, "grad_norm": 1.7787761688232422, "learning_rate": 4.80585146723183e-06, "loss": 0.799, "step": 14920 }, { "epoch": 0.5270366331256706, "grad_norm": 0.9409304261207581, "learning_rate": 4.80527989001077e-06, "loss": 0.5768, "step": 14921 }, { "epoch": 0.5270719549293784, "grad_norm": 1.6157150268554688, "learning_rate": 4.80470831533817e-06, "loss": 0.7963, "step": 14922 }, { "epoch": 0.5271072767330863, "grad_norm": 1.6200100183486938, "learning_rate": 4.804136743221509e-06, "loss": 0.8189, "step": 14923 }, { "epoch": 0.5271425985367942, "grad_norm": 1.7436039447784424, "learning_rate": 4.8035651736682715e-06, "loss": 0.7702, "step": 14924 }, { "epoch": 0.5271779203405021, "grad_norm": 1.7525084018707275, "learning_rate": 4.802993606685935e-06, "loss": 0.7682, "step": 14925 }, { "epoch": 0.5272132421442101, "grad_norm": 1.503404140472412, "learning_rate": 4.802422042281981e-06, "loss": 0.7848, "step": 14926 }, { "epoch": 0.527248563947918, "grad_norm": 1.6276623010635376, "learning_rate": 4.801850480463891e-06, "loss": 0.8106, "step": 14927 }, { "epoch": 0.5272838857516259, "grad_norm": 1.5355912446975708, "learning_rate": 4.801278921239144e-06, "loss": 0.8041, "step": 14928 }, { "epoch": 0.5273192075553338, "grad_norm": 1.8373603820800781, "learning_rate": 4.800707364615222e-06, "loss": 0.7971, "step": 14929 }, { "epoch": 0.5273545293590417, "grad_norm": 1.6766752004623413, "learning_rate": 4.800135810599604e-06, "loss": 0.7992, "step": 14930 }, { "epoch": 0.5273898511627496, "grad_norm": 1.6772849559783936, "learning_rate": 4.799564259199771e-06, "loss": 0.7688, "step": 14931 }, { "epoch": 0.5274251729664575, "grad_norm": 1.874273657798767, "learning_rate": 4.798992710423205e-06, "loss": 0.7944, "step": 14932 }, { "epoch": 0.5274604947701654, "grad_norm": 0.8852532505989075, "learning_rate": 4.798421164277382e-06, "loss": 0.5364, "step": 14933 }, { "epoch": 0.5274958165738733, "grad_norm": 1.6814230680465698, "learning_rate": 4.797849620769786e-06, "loss": 0.8118, "step": 14934 }, { "epoch": 0.5275311383775813, "grad_norm": 1.7751673460006714, "learning_rate": 4.797278079907897e-06, "loss": 0.7803, "step": 14935 }, { "epoch": 0.5275664601812892, "grad_norm": 3.0808956623077393, "learning_rate": 4.796706541699192e-06, "loss": 0.7685, "step": 14936 }, { "epoch": 0.5276017819849971, "grad_norm": 1.7814244031906128, "learning_rate": 4.796135006151155e-06, "loss": 0.756, "step": 14937 }, { "epoch": 0.527637103788705, "grad_norm": 1.827667236328125, "learning_rate": 4.795563473271264e-06, "loss": 0.7888, "step": 14938 }, { "epoch": 0.5276724255924129, "grad_norm": 1.753062129020691, "learning_rate": 4.794991943067001e-06, "loss": 0.7777, "step": 14939 }, { "epoch": 0.5277077473961208, "grad_norm": 1.9832545518875122, "learning_rate": 4.794420415545844e-06, "loss": 0.7827, "step": 14940 }, { "epoch": 0.5277430691998287, "grad_norm": 1.8315826654434204, "learning_rate": 4.793848890715274e-06, "loss": 0.8261, "step": 14941 }, { "epoch": 0.5277783910035366, "grad_norm": 1.8017661571502686, "learning_rate": 4.793277368582772e-06, "loss": 0.7856, "step": 14942 }, { "epoch": 0.5278137128072445, "grad_norm": 1.6220091581344604, "learning_rate": 4.792705849155816e-06, "loss": 0.7853, "step": 14943 }, { "epoch": 0.5278490346109525, "grad_norm": 1.5654575824737549, "learning_rate": 4.792134332441887e-06, "loss": 0.8394, "step": 14944 }, { "epoch": 0.5278843564146604, "grad_norm": 1.8499414920806885, "learning_rate": 4.791562818448465e-06, "loss": 0.8241, "step": 14945 }, { "epoch": 0.5279196782183683, "grad_norm": 1.689165472984314, "learning_rate": 4.79099130718303e-06, "loss": 0.817, "step": 14946 }, { "epoch": 0.5279550000220762, "grad_norm": 1.6526706218719482, "learning_rate": 4.790419798653061e-06, "loss": 0.817, "step": 14947 }, { "epoch": 0.527990321825784, "grad_norm": 1.6717506647109985, "learning_rate": 4.7898482928660375e-06, "loss": 0.7873, "step": 14948 }, { "epoch": 0.5280256436294919, "grad_norm": 1.9369279146194458, "learning_rate": 4.789276789829441e-06, "loss": 0.7896, "step": 14949 }, { "epoch": 0.5280609654331998, "grad_norm": 1.697892665863037, "learning_rate": 4.788705289550752e-06, "loss": 0.8089, "step": 14950 }, { "epoch": 0.5280962872369077, "grad_norm": 1.6613575220108032, "learning_rate": 4.788133792037446e-06, "loss": 0.822, "step": 14951 }, { "epoch": 0.5281316090406156, "grad_norm": 1.6017740964889526, "learning_rate": 4.787562297297006e-06, "loss": 0.7964, "step": 14952 }, { "epoch": 0.5281669308443235, "grad_norm": 1.6784048080444336, "learning_rate": 4.78699080533691e-06, "loss": 0.8164, "step": 14953 }, { "epoch": 0.5282022526480314, "grad_norm": 1.8633995056152344, "learning_rate": 4.7864193161646384e-06, "loss": 0.8176, "step": 14954 }, { "epoch": 0.5282375744517394, "grad_norm": 1.6436907052993774, "learning_rate": 4.785847829787672e-06, "loss": 0.7543, "step": 14955 }, { "epoch": 0.5282728962554473, "grad_norm": 1.758246660232544, "learning_rate": 4.785276346213488e-06, "loss": 0.794, "step": 14956 }, { "epoch": 0.5283082180591552, "grad_norm": 1.796995997428894, "learning_rate": 4.7847048654495656e-06, "loss": 0.8212, "step": 14957 }, { "epoch": 0.5283435398628631, "grad_norm": 1.6992074251174927, "learning_rate": 4.784133387503387e-06, "loss": 0.8001, "step": 14958 }, { "epoch": 0.528378861666571, "grad_norm": 1.8253891468048096, "learning_rate": 4.78356191238243e-06, "loss": 0.7894, "step": 14959 }, { "epoch": 0.5284141834702789, "grad_norm": 1.681593656539917, "learning_rate": 4.782990440094172e-06, "loss": 0.8044, "step": 14960 }, { "epoch": 0.5284495052739868, "grad_norm": 2.5619702339172363, "learning_rate": 4.7824189706460966e-06, "loss": 0.7828, "step": 14961 }, { "epoch": 0.5284848270776947, "grad_norm": 1.8376845121383667, "learning_rate": 4.78184750404568e-06, "loss": 0.7742, "step": 14962 }, { "epoch": 0.5285201488814026, "grad_norm": 2.926758050918579, "learning_rate": 4.7812760403004025e-06, "loss": 0.8336, "step": 14963 }, { "epoch": 0.5285554706851106, "grad_norm": 1.6448763608932495, "learning_rate": 4.7807045794177435e-06, "loss": 0.8003, "step": 14964 }, { "epoch": 0.5285907924888185, "grad_norm": 2.6213324069976807, "learning_rate": 4.78013312140518e-06, "loss": 0.8306, "step": 14965 }, { "epoch": 0.5286261142925264, "grad_norm": 1.742243766784668, "learning_rate": 4.779561666270195e-06, "loss": 0.8, "step": 14966 }, { "epoch": 0.5286614360962343, "grad_norm": 1.9394422769546509, "learning_rate": 4.778990214020264e-06, "loss": 0.7919, "step": 14967 }, { "epoch": 0.5286967578999422, "grad_norm": 1.5874600410461426, "learning_rate": 4.778418764662867e-06, "loss": 0.7853, "step": 14968 }, { "epoch": 0.5287320797036501, "grad_norm": 1.6769345998764038, "learning_rate": 4.777847318205488e-06, "loss": 0.7959, "step": 14969 }, { "epoch": 0.528767401507358, "grad_norm": 1.5711137056350708, "learning_rate": 4.777275874655597e-06, "loss": 0.7695, "step": 14970 }, { "epoch": 0.5288027233110659, "grad_norm": 1.6112018823623657, "learning_rate": 4.77670443402068e-06, "loss": 0.7904, "step": 14971 }, { "epoch": 0.5288380451147738, "grad_norm": 1.7092684507369995, "learning_rate": 4.7761329963082105e-06, "loss": 0.7513, "step": 14972 }, { "epoch": 0.5288733669184817, "grad_norm": 1.6505579948425293, "learning_rate": 4.7755615615256715e-06, "loss": 0.7874, "step": 14973 }, { "epoch": 0.5289086887221895, "grad_norm": 1.7139732837677002, "learning_rate": 4.774990129680542e-06, "loss": 0.8335, "step": 14974 }, { "epoch": 0.5289440105258975, "grad_norm": 1.6001516580581665, "learning_rate": 4.774418700780297e-06, "loss": 0.7842, "step": 14975 }, { "epoch": 0.5289793323296054, "grad_norm": 1.9007272720336914, "learning_rate": 4.773847274832419e-06, "loss": 0.8033, "step": 14976 }, { "epoch": 0.5290146541333133, "grad_norm": 1.6444025039672852, "learning_rate": 4.773275851844386e-06, "loss": 0.7679, "step": 14977 }, { "epoch": 0.5290499759370212, "grad_norm": 1.738160252571106, "learning_rate": 4.772704431823675e-06, "loss": 0.7944, "step": 14978 }, { "epoch": 0.5290852977407291, "grad_norm": 1.6956853866577148, "learning_rate": 4.772133014777766e-06, "loss": 0.8421, "step": 14979 }, { "epoch": 0.529120619544437, "grad_norm": 1.7041651010513306, "learning_rate": 4.7715616007141375e-06, "loss": 0.8043, "step": 14980 }, { "epoch": 0.5291559413481449, "grad_norm": 1.5507826805114746, "learning_rate": 4.770990189640267e-06, "loss": 0.7938, "step": 14981 }, { "epoch": 0.5291912631518528, "grad_norm": 1.715710997581482, "learning_rate": 4.770418781563635e-06, "loss": 0.7544, "step": 14982 }, { "epoch": 0.5292265849555607, "grad_norm": 1.6902536153793335, "learning_rate": 4.769847376491718e-06, "loss": 0.7904, "step": 14983 }, { "epoch": 0.5292619067592687, "grad_norm": 1.7574670314788818, "learning_rate": 4.769275974431995e-06, "loss": 0.7998, "step": 14984 }, { "epoch": 0.5292972285629766, "grad_norm": 1.6216425895690918, "learning_rate": 4.768704575391945e-06, "loss": 0.7832, "step": 14985 }, { "epoch": 0.5293325503666845, "grad_norm": 1.624662160873413, "learning_rate": 4.768133179379046e-06, "loss": 0.7656, "step": 14986 }, { "epoch": 0.5293678721703924, "grad_norm": 1.9743525981903076, "learning_rate": 4.767561786400779e-06, "loss": 0.8031, "step": 14987 }, { "epoch": 0.5294031939741003, "grad_norm": 1.7167962789535522, "learning_rate": 4.766990396464617e-06, "loss": 0.7852, "step": 14988 }, { "epoch": 0.5294385157778082, "grad_norm": 1.917892336845398, "learning_rate": 4.766419009578041e-06, "loss": 0.7754, "step": 14989 }, { "epoch": 0.5294738375815161, "grad_norm": 1.672165870666504, "learning_rate": 4.765847625748529e-06, "loss": 0.7455, "step": 14990 }, { "epoch": 0.529509159385224, "grad_norm": 1.6942192316055298, "learning_rate": 4.765276244983559e-06, "loss": 0.7854, "step": 14991 }, { "epoch": 0.5295444811889319, "grad_norm": 1.7366284132003784, "learning_rate": 4.7647048672906105e-06, "loss": 0.8444, "step": 14992 }, { "epoch": 0.5295798029926398, "grad_norm": 1.6394716501235962, "learning_rate": 4.76413349267716e-06, "loss": 0.7824, "step": 14993 }, { "epoch": 0.5296151247963478, "grad_norm": 1.933397889137268, "learning_rate": 4.763562121150685e-06, "loss": 0.847, "step": 14994 }, { "epoch": 0.5296504466000557, "grad_norm": 1.748764991760254, "learning_rate": 4.762990752718666e-06, "loss": 0.826, "step": 14995 }, { "epoch": 0.5296857684037636, "grad_norm": 1.8110092878341675, "learning_rate": 4.7624193873885785e-06, "loss": 0.7529, "step": 14996 }, { "epoch": 0.5297210902074715, "grad_norm": 1.7265229225158691, "learning_rate": 4.761848025167901e-06, "loss": 0.7969, "step": 14997 }, { "epoch": 0.5297564120111794, "grad_norm": 1.6529936790466309, "learning_rate": 4.761276666064114e-06, "loss": 0.7985, "step": 14998 }, { "epoch": 0.5297917338148873, "grad_norm": 1.7435673475265503, "learning_rate": 4.760705310084691e-06, "loss": 0.7761, "step": 14999 }, { "epoch": 0.5298270556185951, "grad_norm": 1.8107057809829712, "learning_rate": 4.760133957237112e-06, "loss": 0.789, "step": 15000 }, { "epoch": 0.529862377422303, "grad_norm": 1.581171989440918, "learning_rate": 4.759562607528856e-06, "loss": 0.7827, "step": 15001 }, { "epoch": 0.5298976992260109, "grad_norm": 1.5483801364898682, "learning_rate": 4.758991260967398e-06, "loss": 0.7834, "step": 15002 }, { "epoch": 0.5299330210297188, "grad_norm": 1.5996495485305786, "learning_rate": 4.758419917560218e-06, "loss": 0.8197, "step": 15003 }, { "epoch": 0.5299683428334268, "grad_norm": 1.6920467615127563, "learning_rate": 4.757848577314793e-06, "loss": 0.7898, "step": 15004 }, { "epoch": 0.5300036646371347, "grad_norm": 1.7693760395050049, "learning_rate": 4.757277240238599e-06, "loss": 0.8047, "step": 15005 }, { "epoch": 0.5300389864408426, "grad_norm": 1.5546505451202393, "learning_rate": 4.756705906339116e-06, "loss": 0.7916, "step": 15006 }, { "epoch": 0.5300743082445505, "grad_norm": 1.5799657106399536, "learning_rate": 4.756134575623821e-06, "loss": 0.7951, "step": 15007 }, { "epoch": 0.5301096300482584, "grad_norm": 1.6790345907211304, "learning_rate": 4.75556324810019e-06, "loss": 0.7959, "step": 15008 }, { "epoch": 0.5301449518519663, "grad_norm": 1.5421969890594482, "learning_rate": 4.7549919237757e-06, "loss": 0.8098, "step": 15009 }, { "epoch": 0.5301802736556742, "grad_norm": 1.6889866590499878, "learning_rate": 4.754420602657831e-06, "loss": 0.7897, "step": 15010 }, { "epoch": 0.5302155954593821, "grad_norm": 1.5398966073989868, "learning_rate": 4.753849284754059e-06, "loss": 0.7743, "step": 15011 }, { "epoch": 0.53025091726309, "grad_norm": 1.6544853448867798, "learning_rate": 4.75327797007186e-06, "loss": 0.8191, "step": 15012 }, { "epoch": 0.530286239066798, "grad_norm": 1.6837416887283325, "learning_rate": 4.752706658618714e-06, "loss": 0.7768, "step": 15013 }, { "epoch": 0.5303215608705059, "grad_norm": 1.493125081062317, "learning_rate": 4.752135350402097e-06, "loss": 0.7694, "step": 15014 }, { "epoch": 0.5303568826742138, "grad_norm": 1.6020655632019043, "learning_rate": 4.751564045429484e-06, "loss": 0.7652, "step": 15015 }, { "epoch": 0.5303922044779217, "grad_norm": 1.5591856241226196, "learning_rate": 4.750992743708356e-06, "loss": 0.7803, "step": 15016 }, { "epoch": 0.5304275262816296, "grad_norm": 1.634176254272461, "learning_rate": 4.750421445246188e-06, "loss": 0.7912, "step": 15017 }, { "epoch": 0.5304628480853375, "grad_norm": 1.8526109457015991, "learning_rate": 4.749850150050456e-06, "loss": 0.8335, "step": 15018 }, { "epoch": 0.5304981698890454, "grad_norm": 1.533255696296692, "learning_rate": 4.749278858128639e-06, "loss": 0.7991, "step": 15019 }, { "epoch": 0.5305334916927533, "grad_norm": 1.8014132976531982, "learning_rate": 4.748707569488212e-06, "loss": 0.7861, "step": 15020 }, { "epoch": 0.5305688134964612, "grad_norm": 1.6955292224884033, "learning_rate": 4.748136284136654e-06, "loss": 0.8311, "step": 15021 }, { "epoch": 0.5306041353001691, "grad_norm": 1.9559051990509033, "learning_rate": 4.747565002081442e-06, "loss": 0.7527, "step": 15022 }, { "epoch": 0.530639457103877, "grad_norm": 2.0678248405456543, "learning_rate": 4.7469937233300504e-06, "loss": 0.7859, "step": 15023 }, { "epoch": 0.530674778907585, "grad_norm": 1.6411969661712646, "learning_rate": 4.74642244788996e-06, "loss": 0.7991, "step": 15024 }, { "epoch": 0.5307101007112929, "grad_norm": 1.7435849905014038, "learning_rate": 4.745851175768642e-06, "loss": 0.8139, "step": 15025 }, { "epoch": 0.5307454225150007, "grad_norm": 1.5899767875671387, "learning_rate": 4.745279906973577e-06, "loss": 0.7764, "step": 15026 }, { "epoch": 0.5307807443187086, "grad_norm": 2.0639688968658447, "learning_rate": 4.7447086415122415e-06, "loss": 0.806, "step": 15027 }, { "epoch": 0.5308160661224165, "grad_norm": 1.846774697303772, "learning_rate": 4.7441373793921095e-06, "loss": 0.8118, "step": 15028 }, { "epoch": 0.5308513879261244, "grad_norm": 1.664446234703064, "learning_rate": 4.7435661206206606e-06, "loss": 0.7918, "step": 15029 }, { "epoch": 0.5308867097298323, "grad_norm": 1.8007296323776245, "learning_rate": 4.742994865205369e-06, "loss": 0.8006, "step": 15030 }, { "epoch": 0.5309220315335402, "grad_norm": 1.6873579025268555, "learning_rate": 4.7424236131537125e-06, "loss": 0.7975, "step": 15031 }, { "epoch": 0.5309573533372481, "grad_norm": 1.7297390699386597, "learning_rate": 4.7418523644731686e-06, "loss": 0.7348, "step": 15032 }, { "epoch": 0.530992675140956, "grad_norm": 1.8434710502624512, "learning_rate": 4.741281119171211e-06, "loss": 0.8005, "step": 15033 }, { "epoch": 0.531027996944664, "grad_norm": 1.970821738243103, "learning_rate": 4.740709877255318e-06, "loss": 0.777, "step": 15034 }, { "epoch": 0.5310633187483719, "grad_norm": 1.6575809717178345, "learning_rate": 4.740138638732966e-06, "loss": 0.7704, "step": 15035 }, { "epoch": 0.5310986405520798, "grad_norm": 1.63797926902771, "learning_rate": 4.7395674036116306e-06, "loss": 0.7816, "step": 15036 }, { "epoch": 0.5311339623557877, "grad_norm": 1.6160269975662231, "learning_rate": 4.738996171898787e-06, "loss": 0.7797, "step": 15037 }, { "epoch": 0.5311692841594956, "grad_norm": 1.6183600425720215, "learning_rate": 4.738424943601913e-06, "loss": 0.7926, "step": 15038 }, { "epoch": 0.5312046059632035, "grad_norm": 1.6999059915542603, "learning_rate": 4.737853718728484e-06, "loss": 0.7826, "step": 15039 }, { "epoch": 0.5312399277669114, "grad_norm": 1.8680704832077026, "learning_rate": 4.737282497285977e-06, "loss": 0.7803, "step": 15040 }, { "epoch": 0.5312752495706193, "grad_norm": 1.640470027923584, "learning_rate": 4.736711279281867e-06, "loss": 0.7874, "step": 15041 }, { "epoch": 0.5313105713743272, "grad_norm": 1.960849642753601, "learning_rate": 4.7361400647236295e-06, "loss": 0.7701, "step": 15042 }, { "epoch": 0.5313458931780352, "grad_norm": 1.7293816804885864, "learning_rate": 4.735568853618744e-06, "loss": 0.7966, "step": 15043 }, { "epoch": 0.5313812149817431, "grad_norm": 1.717996597290039, "learning_rate": 4.734997645974681e-06, "loss": 0.774, "step": 15044 }, { "epoch": 0.531416536785451, "grad_norm": 1.7099573612213135, "learning_rate": 4.73442644179892e-06, "loss": 0.8013, "step": 15045 }, { "epoch": 0.5314518585891589, "grad_norm": 1.6551530361175537, "learning_rate": 4.7338552410989346e-06, "loss": 0.7998, "step": 15046 }, { "epoch": 0.5314871803928668, "grad_norm": 1.5886354446411133, "learning_rate": 4.7332840438822025e-06, "loss": 0.7843, "step": 15047 }, { "epoch": 0.5315225021965747, "grad_norm": 2.059819459915161, "learning_rate": 4.732712850156199e-06, "loss": 0.7992, "step": 15048 }, { "epoch": 0.5315578240002826, "grad_norm": 1.7462213039398193, "learning_rate": 4.7321416599283985e-06, "loss": 0.7935, "step": 15049 }, { "epoch": 0.5315931458039905, "grad_norm": 1.612111210823059, "learning_rate": 4.7315704732062784e-06, "loss": 0.7784, "step": 15050 }, { "epoch": 0.5316284676076984, "grad_norm": 2.352396249771118, "learning_rate": 4.730999289997314e-06, "loss": 0.767, "step": 15051 }, { "epoch": 0.5316637894114062, "grad_norm": 1.739828109741211, "learning_rate": 4.730428110308979e-06, "loss": 0.8157, "step": 15052 }, { "epoch": 0.5316991112151142, "grad_norm": 2.037686347961426, "learning_rate": 4.729856934148751e-06, "loss": 0.794, "step": 15053 }, { "epoch": 0.5317344330188221, "grad_norm": 1.6539030075073242, "learning_rate": 4.729285761524104e-06, "loss": 0.833, "step": 15054 }, { "epoch": 0.53176975482253, "grad_norm": 1.6172362565994263, "learning_rate": 4.728714592442515e-06, "loss": 0.7842, "step": 15055 }, { "epoch": 0.5318050766262379, "grad_norm": 1.8464701175689697, "learning_rate": 4.728143426911458e-06, "loss": 0.7947, "step": 15056 }, { "epoch": 0.5318403984299458, "grad_norm": 1.6976381540298462, "learning_rate": 4.727572264938408e-06, "loss": 0.8032, "step": 15057 }, { "epoch": 0.5318757202336537, "grad_norm": 1.8217957019805908, "learning_rate": 4.727001106530842e-06, "loss": 0.7852, "step": 15058 }, { "epoch": 0.5319110420373616, "grad_norm": 1.64649498462677, "learning_rate": 4.726429951696234e-06, "loss": 0.7827, "step": 15059 }, { "epoch": 0.5319463638410695, "grad_norm": 1.6473736763000488, "learning_rate": 4.7258588004420584e-06, "loss": 0.7984, "step": 15060 }, { "epoch": 0.5319816856447774, "grad_norm": 1.7928816080093384, "learning_rate": 4.725287652775795e-06, "loss": 0.8073, "step": 15061 }, { "epoch": 0.5320170074484853, "grad_norm": 1.8924508094787598, "learning_rate": 4.724716508704912e-06, "loss": 0.8013, "step": 15062 }, { "epoch": 0.5320523292521933, "grad_norm": 1.71006178855896, "learning_rate": 4.724145368236888e-06, "loss": 0.8014, "step": 15063 }, { "epoch": 0.5320876510559012, "grad_norm": 1.513567328453064, "learning_rate": 4.7235742313791965e-06, "loss": 0.8191, "step": 15064 }, { "epoch": 0.5321229728596091, "grad_norm": 1.6502771377563477, "learning_rate": 4.723003098139315e-06, "loss": 0.7684, "step": 15065 }, { "epoch": 0.532158294663317, "grad_norm": 1.5545285940170288, "learning_rate": 4.722431968524716e-06, "loss": 0.7569, "step": 15066 }, { "epoch": 0.5321936164670249, "grad_norm": 1.7158241271972656, "learning_rate": 4.721860842542875e-06, "loss": 0.8386, "step": 15067 }, { "epoch": 0.5322289382707328, "grad_norm": 1.5871903896331787, "learning_rate": 4.721289720201268e-06, "loss": 0.8201, "step": 15068 }, { "epoch": 0.5322642600744407, "grad_norm": 1.6292601823806763, "learning_rate": 4.720718601507369e-06, "loss": 0.7752, "step": 15069 }, { "epoch": 0.5322995818781486, "grad_norm": 1.8852275609970093, "learning_rate": 4.720147486468651e-06, "loss": 0.8679, "step": 15070 }, { "epoch": 0.5323349036818565, "grad_norm": 1.7240774631500244, "learning_rate": 4.719576375092591e-06, "loss": 0.7647, "step": 15071 }, { "epoch": 0.5323702254855645, "grad_norm": 1.791630744934082, "learning_rate": 4.7190052673866635e-06, "loss": 0.7992, "step": 15072 }, { "epoch": 0.5324055472892724, "grad_norm": 2.521815061569214, "learning_rate": 4.718434163358341e-06, "loss": 0.7764, "step": 15073 }, { "epoch": 0.5324408690929803, "grad_norm": 1.7183042764663696, "learning_rate": 4.717863063015099e-06, "loss": 0.7919, "step": 15074 }, { "epoch": 0.5324761908966882, "grad_norm": 1.7357031106948853, "learning_rate": 4.717291966364413e-06, "loss": 0.8172, "step": 15075 }, { "epoch": 0.5325115127003961, "grad_norm": 2.0084445476531982, "learning_rate": 4.716720873413755e-06, "loss": 0.788, "step": 15076 }, { "epoch": 0.532546834504104, "grad_norm": 1.7962818145751953, "learning_rate": 4.716149784170603e-06, "loss": 0.7908, "step": 15077 }, { "epoch": 0.5325821563078118, "grad_norm": 1.8935080766677856, "learning_rate": 4.715578698642428e-06, "loss": 0.7727, "step": 15078 }, { "epoch": 0.5326174781115197, "grad_norm": 1.6885656118392944, "learning_rate": 4.715007616836706e-06, "loss": 0.777, "step": 15079 }, { "epoch": 0.5326527999152276, "grad_norm": 1.6544543504714966, "learning_rate": 4.714436538760913e-06, "loss": 0.7852, "step": 15080 }, { "epoch": 0.5326881217189355, "grad_norm": 1.6683486700057983, "learning_rate": 4.713865464422519e-06, "loss": 0.7638, "step": 15081 }, { "epoch": 0.5327234435226434, "grad_norm": 1.7802178859710693, "learning_rate": 4.713294393829e-06, "loss": 0.8293, "step": 15082 }, { "epoch": 0.5327587653263514, "grad_norm": 1.7547653913497925, "learning_rate": 4.712723326987829e-06, "loss": 0.7718, "step": 15083 }, { "epoch": 0.5327940871300593, "grad_norm": 1.6792012453079224, "learning_rate": 4.712152263906482e-06, "loss": 0.8191, "step": 15084 }, { "epoch": 0.5328294089337672, "grad_norm": 1.571723222732544, "learning_rate": 4.7115812045924326e-06, "loss": 0.8255, "step": 15085 }, { "epoch": 0.5328647307374751, "grad_norm": 1.6704471111297607, "learning_rate": 4.711010149053153e-06, "loss": 0.7809, "step": 15086 }, { "epoch": 0.532900052541183, "grad_norm": 1.8679916858673096, "learning_rate": 4.710439097296119e-06, "loss": 0.79, "step": 15087 }, { "epoch": 0.5329353743448909, "grad_norm": 1.5797678232192993, "learning_rate": 4.709868049328802e-06, "loss": 0.7837, "step": 15088 }, { "epoch": 0.5329706961485988, "grad_norm": 1.6824506521224976, "learning_rate": 4.70929700515868e-06, "loss": 0.7947, "step": 15089 }, { "epoch": 0.5330060179523067, "grad_norm": 1.4995265007019043, "learning_rate": 4.7087259647932236e-06, "loss": 0.7715, "step": 15090 }, { "epoch": 0.5330413397560146, "grad_norm": 1.6718273162841797, "learning_rate": 4.708154928239905e-06, "loss": 0.8114, "step": 15091 }, { "epoch": 0.5330766615597226, "grad_norm": 1.9001562595367432, "learning_rate": 4.707583895506202e-06, "loss": 0.7634, "step": 15092 }, { "epoch": 0.5331119833634305, "grad_norm": 1.5851556062698364, "learning_rate": 4.707012866599586e-06, "loss": 0.7856, "step": 15093 }, { "epoch": 0.5331473051671384, "grad_norm": 1.5423078536987305, "learning_rate": 4.706441841527529e-06, "loss": 0.7842, "step": 15094 }, { "epoch": 0.5331826269708463, "grad_norm": 1.7277727127075195, "learning_rate": 4.7058708202975065e-06, "loss": 0.7787, "step": 15095 }, { "epoch": 0.5332179487745542, "grad_norm": 2.650902509689331, "learning_rate": 4.705299802916992e-06, "loss": 0.8273, "step": 15096 }, { "epoch": 0.5332532705782621, "grad_norm": 1.9991596937179565, "learning_rate": 4.704728789393457e-06, "loss": 0.7837, "step": 15097 }, { "epoch": 0.53328859238197, "grad_norm": 1.8085490465164185, "learning_rate": 4.704157779734379e-06, "loss": 0.7981, "step": 15098 }, { "epoch": 0.5333239141856779, "grad_norm": 1.5799834728240967, "learning_rate": 4.703586773947227e-06, "loss": 0.7937, "step": 15099 }, { "epoch": 0.5333592359893858, "grad_norm": 2.199276924133301, "learning_rate": 4.703015772039476e-06, "loss": 0.7913, "step": 15100 }, { "epoch": 0.5333945577930937, "grad_norm": 1.6611653566360474, "learning_rate": 4.702444774018597e-06, "loss": 0.7621, "step": 15101 }, { "epoch": 0.5334298795968017, "grad_norm": 1.6439882516860962, "learning_rate": 4.701873779892065e-06, "loss": 0.7381, "step": 15102 }, { "epoch": 0.5334652014005096, "grad_norm": 1.512314796447754, "learning_rate": 4.7013027896673544e-06, "loss": 0.7964, "step": 15103 }, { "epoch": 0.5335005232042174, "grad_norm": 1.7626721858978271, "learning_rate": 4.700731803351935e-06, "loss": 0.8213, "step": 15104 }, { "epoch": 0.5335358450079253, "grad_norm": 1.690053105354309, "learning_rate": 4.700160820953283e-06, "loss": 0.7832, "step": 15105 }, { "epoch": 0.5335711668116332, "grad_norm": 1.6654521226882935, "learning_rate": 4.699589842478869e-06, "loss": 0.7963, "step": 15106 }, { "epoch": 0.5336064886153411, "grad_norm": 1.6033670902252197, "learning_rate": 4.699018867936167e-06, "loss": 0.7671, "step": 15107 }, { "epoch": 0.533641810419049, "grad_norm": 1.5653076171875, "learning_rate": 4.6984478973326495e-06, "loss": 0.7827, "step": 15108 }, { "epoch": 0.5336771322227569, "grad_norm": 1.731576919555664, "learning_rate": 4.6978769306757895e-06, "loss": 0.793, "step": 15109 }, { "epoch": 0.5337124540264648, "grad_norm": 1.5700397491455078, "learning_rate": 4.697305967973058e-06, "loss": 0.7462, "step": 15110 }, { "epoch": 0.5337477758301727, "grad_norm": 2.0186831951141357, "learning_rate": 4.696735009231931e-06, "loss": 0.7997, "step": 15111 }, { "epoch": 0.5337830976338807, "grad_norm": 1.7881004810333252, "learning_rate": 4.696164054459877e-06, "loss": 0.8183, "step": 15112 }, { "epoch": 0.5338184194375886, "grad_norm": 2.5739753246307373, "learning_rate": 4.695593103664373e-06, "loss": 0.8167, "step": 15113 }, { "epoch": 0.5338537412412965, "grad_norm": 1.831275463104248, "learning_rate": 4.695022156852889e-06, "loss": 0.7746, "step": 15114 }, { "epoch": 0.5338890630450044, "grad_norm": 1.7319422960281372, "learning_rate": 4.694451214032895e-06, "loss": 0.7869, "step": 15115 }, { "epoch": 0.5339243848487123, "grad_norm": 1.6135272979736328, "learning_rate": 4.69388027521187e-06, "loss": 0.766, "step": 15116 }, { "epoch": 0.5339597066524202, "grad_norm": 1.7923588752746582, "learning_rate": 4.693309340397281e-06, "loss": 0.7833, "step": 15117 }, { "epoch": 0.5339950284561281, "grad_norm": 1.6045619249343872, "learning_rate": 4.692738409596602e-06, "loss": 0.819, "step": 15118 }, { "epoch": 0.534030350259836, "grad_norm": 1.6719390153884888, "learning_rate": 4.692167482817304e-06, "loss": 0.809, "step": 15119 }, { "epoch": 0.5340656720635439, "grad_norm": 1.8493928909301758, "learning_rate": 4.69159656006686e-06, "loss": 0.746, "step": 15120 }, { "epoch": 0.5341009938672518, "grad_norm": 1.6493850946426392, "learning_rate": 4.6910256413527424e-06, "loss": 0.8163, "step": 15121 }, { "epoch": 0.5341363156709598, "grad_norm": 1.6074308156967163, "learning_rate": 4.690454726682423e-06, "loss": 0.7702, "step": 15122 }, { "epoch": 0.5341716374746677, "grad_norm": 1.6774768829345703, "learning_rate": 4.6898838160633745e-06, "loss": 0.7858, "step": 15123 }, { "epoch": 0.5342069592783756, "grad_norm": 1.6669433116912842, "learning_rate": 4.689312909503068e-06, "loss": 0.7696, "step": 15124 }, { "epoch": 0.5342422810820835, "grad_norm": 1.5997544527053833, "learning_rate": 4.688742007008975e-06, "loss": 0.7862, "step": 15125 }, { "epoch": 0.5342776028857914, "grad_norm": 1.8518885374069214, "learning_rate": 4.688171108588569e-06, "loss": 0.8131, "step": 15126 }, { "epoch": 0.5343129246894993, "grad_norm": 1.7708107233047485, "learning_rate": 4.6876002142493216e-06, "loss": 0.7727, "step": 15127 }, { "epoch": 0.5343482464932072, "grad_norm": 1.804602026939392, "learning_rate": 4.687029323998702e-06, "loss": 0.8216, "step": 15128 }, { "epoch": 0.5343835682969151, "grad_norm": 1.7491507530212402, "learning_rate": 4.686458437844185e-06, "loss": 0.8119, "step": 15129 }, { "epoch": 0.5344188901006229, "grad_norm": 1.6429246664047241, "learning_rate": 4.6858875557932414e-06, "loss": 0.802, "step": 15130 }, { "epoch": 0.5344542119043308, "grad_norm": 1.818528413772583, "learning_rate": 4.685316677853341e-06, "loss": 0.797, "step": 15131 }, { "epoch": 0.5344895337080388, "grad_norm": 1.7711894512176514, "learning_rate": 4.6847458040319585e-06, "loss": 0.7801, "step": 15132 }, { "epoch": 0.5345248555117467, "grad_norm": 1.7307066917419434, "learning_rate": 4.6841749343365625e-06, "loss": 0.8533, "step": 15133 }, { "epoch": 0.5345601773154546, "grad_norm": 1.788948655128479, "learning_rate": 4.6836040687746255e-06, "loss": 0.7895, "step": 15134 }, { "epoch": 0.5345954991191625, "grad_norm": 0.9906092882156372, "learning_rate": 4.683033207353621e-06, "loss": 0.5618, "step": 15135 }, { "epoch": 0.5346308209228704, "grad_norm": 1.5973306894302368, "learning_rate": 4.682462350081017e-06, "loss": 0.7918, "step": 15136 }, { "epoch": 0.5346661427265783, "grad_norm": 1.6602085828781128, "learning_rate": 4.681891496964286e-06, "loss": 0.807, "step": 15137 }, { "epoch": 0.5347014645302862, "grad_norm": 1.6535121202468872, "learning_rate": 4.681320648010898e-06, "loss": 0.7831, "step": 15138 }, { "epoch": 0.5347367863339941, "grad_norm": 1.8608639240264893, "learning_rate": 4.6807498032283265e-06, "loss": 0.7924, "step": 15139 }, { "epoch": 0.534772108137702, "grad_norm": 1.6425896883010864, "learning_rate": 4.680178962624042e-06, "loss": 0.8083, "step": 15140 }, { "epoch": 0.53480742994141, "grad_norm": 1.4744952917099, "learning_rate": 4.679608126205514e-06, "loss": 0.7495, "step": 15141 }, { "epoch": 0.5348427517451179, "grad_norm": 1.7148900032043457, "learning_rate": 4.679037293980215e-06, "loss": 0.7954, "step": 15142 }, { "epoch": 0.5348780735488258, "grad_norm": 1.5730770826339722, "learning_rate": 4.678466465955617e-06, "loss": 0.8205, "step": 15143 }, { "epoch": 0.5349133953525337, "grad_norm": 1.6723628044128418, "learning_rate": 4.677895642139188e-06, "loss": 0.8074, "step": 15144 }, { "epoch": 0.5349487171562416, "grad_norm": 2.108242988586426, "learning_rate": 4.6773248225384e-06, "loss": 0.8014, "step": 15145 }, { "epoch": 0.5349840389599495, "grad_norm": 1.8479135036468506, "learning_rate": 4.676754007160725e-06, "loss": 0.7828, "step": 15146 }, { "epoch": 0.5350193607636574, "grad_norm": 1.833122730255127, "learning_rate": 4.676183196013632e-06, "loss": 0.7751, "step": 15147 }, { "epoch": 0.5350546825673653, "grad_norm": 1.5517555475234985, "learning_rate": 4.675612389104593e-06, "loss": 0.7718, "step": 15148 }, { "epoch": 0.5350900043710732, "grad_norm": 0.8962172269821167, "learning_rate": 4.675041586441077e-06, "loss": 0.5715, "step": 15149 }, { "epoch": 0.5351253261747811, "grad_norm": 1.715051531791687, "learning_rate": 4.674470788030557e-06, "loss": 0.7996, "step": 15150 }, { "epoch": 0.5351606479784891, "grad_norm": 3.774216413497925, "learning_rate": 4.6738999938805015e-06, "loss": 0.8301, "step": 15151 }, { "epoch": 0.535195969782197, "grad_norm": 1.7493082284927368, "learning_rate": 4.673329203998382e-06, "loss": 0.7822, "step": 15152 }, { "epoch": 0.5352312915859049, "grad_norm": 1.7122915983200073, "learning_rate": 4.67275841839167e-06, "loss": 0.8504, "step": 15153 }, { "epoch": 0.5352666133896128, "grad_norm": 1.808958649635315, "learning_rate": 4.672187637067832e-06, "loss": 0.7987, "step": 15154 }, { "epoch": 0.5353019351933207, "grad_norm": 1.8833861351013184, "learning_rate": 4.671616860034341e-06, "loss": 0.8195, "step": 15155 }, { "epoch": 0.5353372569970285, "grad_norm": 1.83497154712677, "learning_rate": 4.671046087298668e-06, "loss": 0.7748, "step": 15156 }, { "epoch": 0.5353725788007364, "grad_norm": 1.8366047143936157, "learning_rate": 4.67047531886828e-06, "loss": 0.785, "step": 15157 }, { "epoch": 0.5354079006044443, "grad_norm": 1.758083701133728, "learning_rate": 4.669904554750651e-06, "loss": 0.7514, "step": 15158 }, { "epoch": 0.5354432224081522, "grad_norm": 1.8195654153823853, "learning_rate": 4.6693337949532465e-06, "loss": 0.8007, "step": 15159 }, { "epoch": 0.5354785442118601, "grad_norm": 1.6337807178497314, "learning_rate": 4.668763039483542e-06, "loss": 0.7592, "step": 15160 }, { "epoch": 0.535513866015568, "grad_norm": 1.649620771408081, "learning_rate": 4.6681922883490034e-06, "loss": 0.7729, "step": 15161 }, { "epoch": 0.535549187819276, "grad_norm": 3.1237993240356445, "learning_rate": 4.6676215415571005e-06, "loss": 0.7876, "step": 15162 }, { "epoch": 0.5355845096229839, "grad_norm": 1.6489826440811157, "learning_rate": 4.667050799115306e-06, "loss": 0.7711, "step": 15163 }, { "epoch": 0.5356198314266918, "grad_norm": 1.6616652011871338, "learning_rate": 4.6664800610310875e-06, "loss": 0.7666, "step": 15164 }, { "epoch": 0.5356551532303997, "grad_norm": 1.7419908046722412, "learning_rate": 4.665909327311916e-06, "loss": 0.7947, "step": 15165 }, { "epoch": 0.5356904750341076, "grad_norm": 1.7576345205307007, "learning_rate": 4.6653385979652594e-06, "loss": 0.789, "step": 15166 }, { "epoch": 0.5357257968378155, "grad_norm": 1.793323278427124, "learning_rate": 4.664767872998589e-06, "loss": 0.7876, "step": 15167 }, { "epoch": 0.5357611186415234, "grad_norm": 1.7709869146347046, "learning_rate": 4.664197152419373e-06, "loss": 0.7743, "step": 15168 }, { "epoch": 0.5357964404452313, "grad_norm": 2.0256760120391846, "learning_rate": 4.663626436235082e-06, "loss": 0.8382, "step": 15169 }, { "epoch": 0.5358317622489392, "grad_norm": 1.6617382764816284, "learning_rate": 4.663055724453186e-06, "loss": 0.768, "step": 15170 }, { "epoch": 0.5358670840526472, "grad_norm": 1.775793194770813, "learning_rate": 4.662485017081151e-06, "loss": 0.7815, "step": 15171 }, { "epoch": 0.5359024058563551, "grad_norm": 1.5833067893981934, "learning_rate": 4.661914314126451e-06, "loss": 0.8031, "step": 15172 }, { "epoch": 0.535937727660063, "grad_norm": 1.7983447313308716, "learning_rate": 4.661343615596553e-06, "loss": 0.8113, "step": 15173 }, { "epoch": 0.5359730494637709, "grad_norm": 1.6701005697250366, "learning_rate": 4.660772921498925e-06, "loss": 0.7862, "step": 15174 }, { "epoch": 0.5360083712674788, "grad_norm": 1.7238872051239014, "learning_rate": 4.660202231841036e-06, "loss": 0.7905, "step": 15175 }, { "epoch": 0.5360436930711867, "grad_norm": 2.4398300647735596, "learning_rate": 4.659631546630358e-06, "loss": 0.7544, "step": 15176 }, { "epoch": 0.5360790148748946, "grad_norm": 1.5491962432861328, "learning_rate": 4.659060865874358e-06, "loss": 0.7789, "step": 15177 }, { "epoch": 0.5361143366786025, "grad_norm": 1.7357523441314697, "learning_rate": 4.658490189580505e-06, "loss": 0.7851, "step": 15178 }, { "epoch": 0.5361496584823104, "grad_norm": 1.9362940788269043, "learning_rate": 4.657919517756268e-06, "loss": 0.801, "step": 15179 }, { "epoch": 0.5361849802860184, "grad_norm": 1.625830054283142, "learning_rate": 4.657348850409117e-06, "loss": 0.801, "step": 15180 }, { "epoch": 0.5362203020897263, "grad_norm": 1.75800621509552, "learning_rate": 4.656778187546518e-06, "loss": 0.8397, "step": 15181 }, { "epoch": 0.5362556238934341, "grad_norm": 1.7576866149902344, "learning_rate": 4.656207529175943e-06, "loss": 0.8144, "step": 15182 }, { "epoch": 0.536290945697142, "grad_norm": 1.6726422309875488, "learning_rate": 4.655636875304859e-06, "loss": 0.7668, "step": 15183 }, { "epoch": 0.5363262675008499, "grad_norm": 1.7389330863952637, "learning_rate": 4.655066225940735e-06, "loss": 0.8092, "step": 15184 }, { "epoch": 0.5363615893045578, "grad_norm": 1.0306888818740845, "learning_rate": 4.654495581091039e-06, "loss": 0.6148, "step": 15185 }, { "epoch": 0.5363969111082657, "grad_norm": 1.6570194959640503, "learning_rate": 4.65392494076324e-06, "loss": 0.7923, "step": 15186 }, { "epoch": 0.5364322329119736, "grad_norm": 1.640867829322815, "learning_rate": 4.6533543049648066e-06, "loss": 0.8096, "step": 15187 }, { "epoch": 0.5364675547156815, "grad_norm": 1.5570826530456543, "learning_rate": 4.652783673703207e-06, "loss": 0.8007, "step": 15188 }, { "epoch": 0.5365028765193894, "grad_norm": 1.6401392221450806, "learning_rate": 4.6522130469859084e-06, "loss": 0.7915, "step": 15189 }, { "epoch": 0.5365381983230973, "grad_norm": 1.706991195678711, "learning_rate": 4.651642424820384e-06, "loss": 0.8223, "step": 15190 }, { "epoch": 0.5365735201268053, "grad_norm": 1.6364737749099731, "learning_rate": 4.651071807214094e-06, "loss": 0.7845, "step": 15191 }, { "epoch": 0.5366088419305132, "grad_norm": 1.5855522155761719, "learning_rate": 4.650501194174511e-06, "loss": 0.8091, "step": 15192 }, { "epoch": 0.5366441637342211, "grad_norm": 1.8018572330474854, "learning_rate": 4.6499305857091026e-06, "loss": 0.821, "step": 15193 }, { "epoch": 0.536679485537929, "grad_norm": 1.773586392402649, "learning_rate": 4.649359981825337e-06, "loss": 0.8081, "step": 15194 }, { "epoch": 0.5367148073416369, "grad_norm": 1.7068829536437988, "learning_rate": 4.648789382530683e-06, "loss": 0.7904, "step": 15195 }, { "epoch": 0.5367501291453448, "grad_norm": 1.8842437267303467, "learning_rate": 4.648218787832607e-06, "loss": 0.8065, "step": 15196 }, { "epoch": 0.5367854509490527, "grad_norm": 1.8112916946411133, "learning_rate": 4.647648197738578e-06, "loss": 0.7722, "step": 15197 }, { "epoch": 0.5368207727527606, "grad_norm": 1.6514860391616821, "learning_rate": 4.647077612256063e-06, "loss": 0.7563, "step": 15198 }, { "epoch": 0.5368560945564685, "grad_norm": 1.7625792026519775, "learning_rate": 4.646507031392528e-06, "loss": 0.8094, "step": 15199 }, { "epoch": 0.5368914163601765, "grad_norm": 1.6246106624603271, "learning_rate": 4.645936455155445e-06, "loss": 0.7901, "step": 15200 }, { "epoch": 0.5369267381638844, "grad_norm": 1.6513665914535522, "learning_rate": 4.645365883552278e-06, "loss": 0.8025, "step": 15201 }, { "epoch": 0.5369620599675923, "grad_norm": 1.742385745048523, "learning_rate": 4.644795316590495e-06, "loss": 0.825, "step": 15202 }, { "epoch": 0.5369973817713002, "grad_norm": 1.806015968322754, "learning_rate": 4.644224754277564e-06, "loss": 0.7734, "step": 15203 }, { "epoch": 0.5370327035750081, "grad_norm": 1.7919493913650513, "learning_rate": 4.643654196620954e-06, "loss": 0.7727, "step": 15204 }, { "epoch": 0.537068025378716, "grad_norm": 1.6711885929107666, "learning_rate": 4.6430836436281305e-06, "loss": 0.7474, "step": 15205 }, { "epoch": 0.5371033471824239, "grad_norm": 1.7009888887405396, "learning_rate": 4.6425130953065605e-06, "loss": 0.7696, "step": 15206 }, { "epoch": 0.5371386689861318, "grad_norm": 1.628004550933838, "learning_rate": 4.6419425516637116e-06, "loss": 0.7843, "step": 15207 }, { "epoch": 0.5371739907898396, "grad_norm": 1.8452507257461548, "learning_rate": 4.641372012707053e-06, "loss": 0.7434, "step": 15208 }, { "epoch": 0.5372093125935475, "grad_norm": 1.7609120607376099, "learning_rate": 4.64080147844405e-06, "loss": 0.7771, "step": 15209 }, { "epoch": 0.5372446343972554, "grad_norm": 1.7093173265457153, "learning_rate": 4.64023094888217e-06, "loss": 0.7843, "step": 15210 }, { "epoch": 0.5372799562009634, "grad_norm": 1.6066563129425049, "learning_rate": 4.639660424028878e-06, "loss": 0.7906, "step": 15211 }, { "epoch": 0.5373152780046713, "grad_norm": 1.697479009628296, "learning_rate": 4.6390899038916425e-06, "loss": 0.7617, "step": 15212 }, { "epoch": 0.5373505998083792, "grad_norm": 1.6538006067276, "learning_rate": 4.638519388477932e-06, "loss": 0.7879, "step": 15213 }, { "epoch": 0.5373859216120871, "grad_norm": 1.7164478302001953, "learning_rate": 4.637948877795211e-06, "loss": 0.7813, "step": 15214 }, { "epoch": 0.537421243415795, "grad_norm": 1.708606243133545, "learning_rate": 4.6373783718509465e-06, "loss": 0.8039, "step": 15215 }, { "epoch": 0.5374565652195029, "grad_norm": 1.6335108280181885, "learning_rate": 4.636807870652606e-06, "loss": 0.7743, "step": 15216 }, { "epoch": 0.5374918870232108, "grad_norm": 1.6449260711669922, "learning_rate": 4.636237374207655e-06, "loss": 0.7724, "step": 15217 }, { "epoch": 0.5375272088269187, "grad_norm": 1.6461632251739502, "learning_rate": 4.635666882523563e-06, "loss": 0.7789, "step": 15218 }, { "epoch": 0.5375625306306266, "grad_norm": 1.6684945821762085, "learning_rate": 4.635096395607793e-06, "loss": 0.7938, "step": 15219 }, { "epoch": 0.5375978524343346, "grad_norm": 1.755462646484375, "learning_rate": 4.634525913467813e-06, "loss": 0.7922, "step": 15220 }, { "epoch": 0.5376331742380425, "grad_norm": 1.6081196069717407, "learning_rate": 4.633955436111089e-06, "loss": 0.8052, "step": 15221 }, { "epoch": 0.5376684960417504, "grad_norm": 1.6729369163513184, "learning_rate": 4.633384963545088e-06, "loss": 0.8203, "step": 15222 }, { "epoch": 0.5377038178454583, "grad_norm": 1.6437861919403076, "learning_rate": 4.632814495777274e-06, "loss": 0.7924, "step": 15223 }, { "epoch": 0.5377391396491662, "grad_norm": 1.702561616897583, "learning_rate": 4.632244032815117e-06, "loss": 0.788, "step": 15224 }, { "epoch": 0.5377744614528741, "grad_norm": 1.5519070625305176, "learning_rate": 4.63167357466608e-06, "loss": 0.777, "step": 15225 }, { "epoch": 0.537809783256582, "grad_norm": 1.7506362199783325, "learning_rate": 4.631103121337629e-06, "loss": 0.7908, "step": 15226 }, { "epoch": 0.5378451050602899, "grad_norm": 1.6423087120056152, "learning_rate": 4.630532672837234e-06, "loss": 0.7882, "step": 15227 }, { "epoch": 0.5378804268639978, "grad_norm": 2.0302300453186035, "learning_rate": 4.629962229172357e-06, "loss": 0.7782, "step": 15228 }, { "epoch": 0.5379157486677058, "grad_norm": 1.8713889122009277, "learning_rate": 4.6293917903504634e-06, "loss": 0.7813, "step": 15229 }, { "epoch": 0.5379510704714137, "grad_norm": 1.7238073348999023, "learning_rate": 4.628821356379021e-06, "loss": 0.7997, "step": 15230 }, { "epoch": 0.5379863922751216, "grad_norm": 1.7797768115997314, "learning_rate": 4.628250927265495e-06, "loss": 0.83, "step": 15231 }, { "epoch": 0.5380217140788295, "grad_norm": 1.8460768461227417, "learning_rate": 4.6276805030173505e-06, "loss": 0.7758, "step": 15232 }, { "epoch": 0.5380570358825374, "grad_norm": 1.5711735486984253, "learning_rate": 4.627110083642053e-06, "loss": 0.7842, "step": 15233 }, { "epoch": 0.5380923576862452, "grad_norm": 1.6769717931747437, "learning_rate": 4.62653966914707e-06, "loss": 0.7978, "step": 15234 }, { "epoch": 0.5381276794899531, "grad_norm": 1.6223748922348022, "learning_rate": 4.625969259539866e-06, "loss": 0.7594, "step": 15235 }, { "epoch": 0.538163001293661, "grad_norm": 1.4855117797851562, "learning_rate": 4.625398854827904e-06, "loss": 0.7412, "step": 15236 }, { "epoch": 0.5381983230973689, "grad_norm": 1.6635384559631348, "learning_rate": 4.624828455018652e-06, "loss": 0.826, "step": 15237 }, { "epoch": 0.5382336449010768, "grad_norm": 1.648219347000122, "learning_rate": 4.6242580601195755e-06, "loss": 0.795, "step": 15238 }, { "epoch": 0.5382689667047847, "grad_norm": 1.7507115602493286, "learning_rate": 4.623687670138138e-06, "loss": 0.8385, "step": 15239 }, { "epoch": 0.5383042885084927, "grad_norm": 1.665258526802063, "learning_rate": 4.623117285081806e-06, "loss": 0.7726, "step": 15240 }, { "epoch": 0.5383396103122006, "grad_norm": 1.767665147781372, "learning_rate": 4.6225469049580435e-06, "loss": 0.7734, "step": 15241 }, { "epoch": 0.5383749321159085, "grad_norm": 1.6475435495376587, "learning_rate": 4.6219765297743176e-06, "loss": 0.7653, "step": 15242 }, { "epoch": 0.5384102539196164, "grad_norm": 1.7443207502365112, "learning_rate": 4.621406159538091e-06, "loss": 0.784, "step": 15243 }, { "epoch": 0.5384455757233243, "grad_norm": 1.6996068954467773, "learning_rate": 4.620835794256829e-06, "loss": 0.8148, "step": 15244 }, { "epoch": 0.5384808975270322, "grad_norm": 1.6235419511795044, "learning_rate": 4.620265433937997e-06, "loss": 0.825, "step": 15245 }, { "epoch": 0.5385162193307401, "grad_norm": 2.511467218399048, "learning_rate": 4.619695078589062e-06, "loss": 0.8064, "step": 15246 }, { "epoch": 0.538551541134448, "grad_norm": 1.5743435621261597, "learning_rate": 4.619124728217484e-06, "loss": 0.7813, "step": 15247 }, { "epoch": 0.5385868629381559, "grad_norm": 1.6768505573272705, "learning_rate": 4.61855438283073e-06, "loss": 0.8251, "step": 15248 }, { "epoch": 0.5386221847418639, "grad_norm": 1.6808167695999146, "learning_rate": 4.617984042436264e-06, "loss": 0.7805, "step": 15249 }, { "epoch": 0.5386575065455718, "grad_norm": 1.4830073118209839, "learning_rate": 4.617413707041553e-06, "loss": 0.7854, "step": 15250 }, { "epoch": 0.5386928283492797, "grad_norm": 1.5713051557540894, "learning_rate": 4.616843376654057e-06, "loss": 0.7417, "step": 15251 }, { "epoch": 0.5387281501529876, "grad_norm": 1.9077210426330566, "learning_rate": 4.6162730512812445e-06, "loss": 0.8192, "step": 15252 }, { "epoch": 0.5387634719566955, "grad_norm": 1.5653352737426758, "learning_rate": 4.6157027309305776e-06, "loss": 0.8139, "step": 15253 }, { "epoch": 0.5387987937604034, "grad_norm": 1.7665585279464722, "learning_rate": 4.615132415609521e-06, "loss": 0.7505, "step": 15254 }, { "epoch": 0.5388341155641113, "grad_norm": 4.760213375091553, "learning_rate": 4.614562105325539e-06, "loss": 0.831, "step": 15255 }, { "epoch": 0.5388694373678192, "grad_norm": 1.6585166454315186, "learning_rate": 4.613991800086097e-06, "loss": 0.8068, "step": 15256 }, { "epoch": 0.5389047591715271, "grad_norm": 1.607696294784546, "learning_rate": 4.613421499898656e-06, "loss": 0.8028, "step": 15257 }, { "epoch": 0.538940080975235, "grad_norm": 5.6929144859313965, "learning_rate": 4.612851204770682e-06, "loss": 0.8333, "step": 15258 }, { "epoch": 0.538975402778943, "grad_norm": 1.7427605390548706, "learning_rate": 4.612280914709641e-06, "loss": 0.7832, "step": 15259 }, { "epoch": 0.5390107245826508, "grad_norm": 1.6166892051696777, "learning_rate": 4.611710629722991e-06, "loss": 0.7711, "step": 15260 }, { "epoch": 0.5390460463863587, "grad_norm": 1.824744701385498, "learning_rate": 4.611140349818201e-06, "loss": 0.7642, "step": 15261 }, { "epoch": 0.5390813681900666, "grad_norm": 2.2967891693115234, "learning_rate": 4.610570075002734e-06, "loss": 0.7832, "step": 15262 }, { "epoch": 0.5391166899937745, "grad_norm": 1.6377620697021484, "learning_rate": 4.609999805284051e-06, "loss": 0.7655, "step": 15263 }, { "epoch": 0.5391520117974824, "grad_norm": 1.8174916505813599, "learning_rate": 4.609429540669619e-06, "loss": 0.7932, "step": 15264 }, { "epoch": 0.5391873336011903, "grad_norm": 1.6279397010803223, "learning_rate": 4.6088592811668986e-06, "loss": 0.7963, "step": 15265 }, { "epoch": 0.5392226554048982, "grad_norm": 1.6862213611602783, "learning_rate": 4.608289026783355e-06, "loss": 0.8174, "step": 15266 }, { "epoch": 0.5392579772086061, "grad_norm": 1.8533713817596436, "learning_rate": 4.607718777526449e-06, "loss": 0.7834, "step": 15267 }, { "epoch": 0.539293299012314, "grad_norm": 1.717563271522522, "learning_rate": 4.607148533403648e-06, "loss": 0.794, "step": 15268 }, { "epoch": 0.539328620816022, "grad_norm": 1.6351368427276611, "learning_rate": 4.606578294422412e-06, "loss": 0.7886, "step": 15269 }, { "epoch": 0.5393639426197299, "grad_norm": 1.473504900932312, "learning_rate": 4.606008060590205e-06, "loss": 0.7791, "step": 15270 }, { "epoch": 0.5393992644234378, "grad_norm": 1.5409637689590454, "learning_rate": 4.605437831914492e-06, "loss": 0.7807, "step": 15271 }, { "epoch": 0.5394345862271457, "grad_norm": 1.6488525867462158, "learning_rate": 4.604867608402734e-06, "loss": 0.7974, "step": 15272 }, { "epoch": 0.5394699080308536, "grad_norm": 1.599453091621399, "learning_rate": 4.6042973900623935e-06, "loss": 0.8006, "step": 15273 }, { "epoch": 0.5395052298345615, "grad_norm": 1.6342620849609375, "learning_rate": 4.603727176900935e-06, "loss": 0.7848, "step": 15274 }, { "epoch": 0.5395405516382694, "grad_norm": 1.6451258659362793, "learning_rate": 4.6031569689258206e-06, "loss": 0.7802, "step": 15275 }, { "epoch": 0.5395758734419773, "grad_norm": 1.6002994775772095, "learning_rate": 4.602586766144513e-06, "loss": 0.8169, "step": 15276 }, { "epoch": 0.5396111952456852, "grad_norm": 1.8301291465759277, "learning_rate": 4.6020165685644756e-06, "loss": 0.7939, "step": 15277 }, { "epoch": 0.5396465170493931, "grad_norm": 1.5880745649337769, "learning_rate": 4.60144637619317e-06, "loss": 0.7932, "step": 15278 }, { "epoch": 0.5396818388531011, "grad_norm": 1.6275938749313354, "learning_rate": 4.60087618903806e-06, "loss": 0.7766, "step": 15279 }, { "epoch": 0.539717160656809, "grad_norm": 1.5247881412506104, "learning_rate": 4.600306007106608e-06, "loss": 0.7972, "step": 15280 }, { "epoch": 0.5397524824605169, "grad_norm": 1.562740683555603, "learning_rate": 4.599735830406273e-06, "loss": 0.7756, "step": 15281 }, { "epoch": 0.5397878042642248, "grad_norm": 1.5848159790039062, "learning_rate": 4.599165658944523e-06, "loss": 0.7865, "step": 15282 }, { "epoch": 0.5398231260679327, "grad_norm": 1.740805745124817, "learning_rate": 4.598595492728818e-06, "loss": 0.7951, "step": 15283 }, { "epoch": 0.5398584478716406, "grad_norm": 1.7559269666671753, "learning_rate": 4.598025331766619e-06, "loss": 0.7784, "step": 15284 }, { "epoch": 0.5398937696753485, "grad_norm": 1.534012794494629, "learning_rate": 4.597455176065387e-06, "loss": 0.7552, "step": 15285 }, { "epoch": 0.5399290914790564, "grad_norm": 1.6265349388122559, "learning_rate": 4.596885025632588e-06, "loss": 0.8169, "step": 15286 }, { "epoch": 0.5399644132827642, "grad_norm": 1.5670595169067383, "learning_rate": 4.596314880475681e-06, "loss": 0.7931, "step": 15287 }, { "epoch": 0.5399997350864721, "grad_norm": 1.64461088180542, "learning_rate": 4.595744740602129e-06, "loss": 0.7942, "step": 15288 }, { "epoch": 0.54003505689018, "grad_norm": 1.6722216606140137, "learning_rate": 4.595174606019394e-06, "loss": 0.8163, "step": 15289 }, { "epoch": 0.540070378693888, "grad_norm": 1.77822744846344, "learning_rate": 4.594604476734939e-06, "loss": 0.7907, "step": 15290 }, { "epoch": 0.5401057004975959, "grad_norm": 1.6877644062042236, "learning_rate": 4.594034352756221e-06, "loss": 0.7889, "step": 15291 }, { "epoch": 0.5401410223013038, "grad_norm": 1.7092548608779907, "learning_rate": 4.5934642340907086e-06, "loss": 0.8181, "step": 15292 }, { "epoch": 0.5401763441050117, "grad_norm": 1.6920320987701416, "learning_rate": 4.592894120745859e-06, "loss": 0.7569, "step": 15293 }, { "epoch": 0.5402116659087196, "grad_norm": 1.5623056888580322, "learning_rate": 4.592324012729132e-06, "loss": 0.7544, "step": 15294 }, { "epoch": 0.5402469877124275, "grad_norm": 2.1139419078826904, "learning_rate": 4.591753910047994e-06, "loss": 0.798, "step": 15295 }, { "epoch": 0.5402823095161354, "grad_norm": 1.5853937864303589, "learning_rate": 4.5911838127099054e-06, "loss": 0.799, "step": 15296 }, { "epoch": 0.5403176313198433, "grad_norm": 1.4667885303497314, "learning_rate": 4.590613720722323e-06, "loss": 0.7888, "step": 15297 }, { "epoch": 0.5403529531235512, "grad_norm": 1.6566847562789917, "learning_rate": 4.590043634092714e-06, "loss": 0.7856, "step": 15298 }, { "epoch": 0.5403882749272592, "grad_norm": 1.5842570066452026, "learning_rate": 4.589473552828535e-06, "loss": 0.7852, "step": 15299 }, { "epoch": 0.5404235967309671, "grad_norm": 1.5438289642333984, "learning_rate": 4.58890347693725e-06, "loss": 0.786, "step": 15300 }, { "epoch": 0.540458918534675, "grad_norm": 1.6240415573120117, "learning_rate": 4.58833340642632e-06, "loss": 0.7752, "step": 15301 }, { "epoch": 0.5404942403383829, "grad_norm": 1.7388474941253662, "learning_rate": 4.587763341303204e-06, "loss": 0.7788, "step": 15302 }, { "epoch": 0.5405295621420908, "grad_norm": 2.4612200260162354, "learning_rate": 4.5871932815753645e-06, "loss": 0.817, "step": 15303 }, { "epoch": 0.5405648839457987, "grad_norm": 1.8292617797851562, "learning_rate": 4.58662322725026e-06, "loss": 0.775, "step": 15304 }, { "epoch": 0.5406002057495066, "grad_norm": 1.8372304439544678, "learning_rate": 4.586053178335354e-06, "loss": 0.8326, "step": 15305 }, { "epoch": 0.5406355275532145, "grad_norm": 1.756618618965149, "learning_rate": 4.5854831348381065e-06, "loss": 0.7771, "step": 15306 }, { "epoch": 0.5406708493569224, "grad_norm": 2.5554070472717285, "learning_rate": 4.584913096765977e-06, "loss": 0.8146, "step": 15307 }, { "epoch": 0.5407061711606304, "grad_norm": 1.7582718133926392, "learning_rate": 4.584343064126427e-06, "loss": 0.8169, "step": 15308 }, { "epoch": 0.5407414929643383, "grad_norm": 1.7727787494659424, "learning_rate": 4.583773036926917e-06, "loss": 0.7958, "step": 15309 }, { "epoch": 0.5407768147680462, "grad_norm": 1.6743030548095703, "learning_rate": 4.583203015174907e-06, "loss": 0.7816, "step": 15310 }, { "epoch": 0.5408121365717541, "grad_norm": 1.6710059642791748, "learning_rate": 4.582632998877859e-06, "loss": 0.8062, "step": 15311 }, { "epoch": 0.540847458375462, "grad_norm": 1.5894370079040527, "learning_rate": 4.58206298804323e-06, "loss": 0.7655, "step": 15312 }, { "epoch": 0.5408827801791698, "grad_norm": 1.6318411827087402, "learning_rate": 4.581492982678483e-06, "loss": 0.8178, "step": 15313 }, { "epoch": 0.5409181019828777, "grad_norm": 1.618520975112915, "learning_rate": 4.580922982791077e-06, "loss": 0.797, "step": 15314 }, { "epoch": 0.5409534237865856, "grad_norm": 1.7269697189331055, "learning_rate": 4.580352988388472e-06, "loss": 0.8004, "step": 15315 }, { "epoch": 0.5409887455902935, "grad_norm": 3.127178907394409, "learning_rate": 4.5797829994781285e-06, "loss": 0.7933, "step": 15316 }, { "epoch": 0.5410240673940014, "grad_norm": 1.5922744274139404, "learning_rate": 4.579213016067507e-06, "loss": 0.8104, "step": 15317 }, { "epoch": 0.5410593891977094, "grad_norm": 1.6178059577941895, "learning_rate": 4.578643038164064e-06, "loss": 0.7998, "step": 15318 }, { "epoch": 0.5410947110014173, "grad_norm": 2.0141987800598145, "learning_rate": 4.578073065775263e-06, "loss": 0.7998, "step": 15319 }, { "epoch": 0.5411300328051252, "grad_norm": 1.688764214515686, "learning_rate": 4.577503098908565e-06, "loss": 0.8112, "step": 15320 }, { "epoch": 0.5411653546088331, "grad_norm": 1.8427250385284424, "learning_rate": 4.576933137571424e-06, "loss": 0.8217, "step": 15321 }, { "epoch": 0.541200676412541, "grad_norm": 1.6801890134811401, "learning_rate": 4.576363181771303e-06, "loss": 0.7766, "step": 15322 }, { "epoch": 0.5412359982162489, "grad_norm": 1.7045687437057495, "learning_rate": 4.575793231515662e-06, "loss": 0.7833, "step": 15323 }, { "epoch": 0.5412713200199568, "grad_norm": 1.6827712059020996, "learning_rate": 4.575223286811958e-06, "loss": 0.7903, "step": 15324 }, { "epoch": 0.5413066418236647, "grad_norm": 1.7379308938980103, "learning_rate": 4.574653347667652e-06, "loss": 0.8035, "step": 15325 }, { "epoch": 0.5413419636273726, "grad_norm": 1.540024995803833, "learning_rate": 4.574083414090204e-06, "loss": 0.7943, "step": 15326 }, { "epoch": 0.5413772854310805, "grad_norm": 3.5235812664031982, "learning_rate": 4.573513486087072e-06, "loss": 0.779, "step": 15327 }, { "epoch": 0.5414126072347885, "grad_norm": 1.6528196334838867, "learning_rate": 4.572943563665714e-06, "loss": 0.7455, "step": 15328 }, { "epoch": 0.5414479290384964, "grad_norm": 1.663515329360962, "learning_rate": 4.572373646833592e-06, "loss": 0.8058, "step": 15329 }, { "epoch": 0.5414832508422043, "grad_norm": 1.6708309650421143, "learning_rate": 4.571803735598163e-06, "loss": 0.795, "step": 15330 }, { "epoch": 0.5415185726459122, "grad_norm": 1.766719102859497, "learning_rate": 4.571233829966885e-06, "loss": 0.8006, "step": 15331 }, { "epoch": 0.5415538944496201, "grad_norm": 1.5598506927490234, "learning_rate": 4.57066392994722e-06, "loss": 0.7873, "step": 15332 }, { "epoch": 0.541589216253328, "grad_norm": 1.695836067199707, "learning_rate": 4.570094035546623e-06, "loss": 0.7979, "step": 15333 }, { "epoch": 0.5416245380570359, "grad_norm": 1.4498451948165894, "learning_rate": 4.569524146772555e-06, "loss": 0.7895, "step": 15334 }, { "epoch": 0.5416598598607438, "grad_norm": 1.645289659500122, "learning_rate": 4.568954263632475e-06, "loss": 0.8037, "step": 15335 }, { "epoch": 0.5416951816644517, "grad_norm": 1.6773637533187866, "learning_rate": 4.568384386133839e-06, "loss": 0.7928, "step": 15336 }, { "epoch": 0.5417305034681597, "grad_norm": 1.5833277702331543, "learning_rate": 4.567814514284108e-06, "loss": 0.7637, "step": 15337 }, { "epoch": 0.5417658252718676, "grad_norm": 1.8360120058059692, "learning_rate": 4.56724464809074e-06, "loss": 0.7716, "step": 15338 }, { "epoch": 0.5418011470755754, "grad_norm": 1.4976187944412231, "learning_rate": 4.566674787561193e-06, "loss": 0.7632, "step": 15339 }, { "epoch": 0.5418364688792833, "grad_norm": 1.606629490852356, "learning_rate": 4.566104932702924e-06, "loss": 0.7838, "step": 15340 }, { "epoch": 0.5418717906829912, "grad_norm": 1.692296028137207, "learning_rate": 4.565535083523391e-06, "loss": 0.7814, "step": 15341 }, { "epoch": 0.5419071124866991, "grad_norm": 1.8831208944320679, "learning_rate": 4.564965240030055e-06, "loss": 0.7839, "step": 15342 }, { "epoch": 0.541942434290407, "grad_norm": 1.753018856048584, "learning_rate": 4.56439540223037e-06, "loss": 0.7741, "step": 15343 }, { "epoch": 0.5419777560941149, "grad_norm": 1.650443434715271, "learning_rate": 4.563825570131798e-06, "loss": 0.7687, "step": 15344 }, { "epoch": 0.5420130778978228, "grad_norm": 1.5094658136367798, "learning_rate": 4.563255743741794e-06, "loss": 0.7966, "step": 15345 }, { "epoch": 0.5420483997015307, "grad_norm": 1.5891599655151367, "learning_rate": 4.562685923067816e-06, "loss": 0.7742, "step": 15346 }, { "epoch": 0.5420837215052386, "grad_norm": 2.3315916061401367, "learning_rate": 4.5621161081173245e-06, "loss": 0.8202, "step": 15347 }, { "epoch": 0.5421190433089466, "grad_norm": 1.6710962057113647, "learning_rate": 4.561546298897774e-06, "loss": 0.7971, "step": 15348 }, { "epoch": 0.5421543651126545, "grad_norm": 1.7150521278381348, "learning_rate": 4.5609764954166225e-06, "loss": 0.7526, "step": 15349 }, { "epoch": 0.5421896869163624, "grad_norm": 1.711029052734375, "learning_rate": 4.560406697681329e-06, "loss": 0.7909, "step": 15350 }, { "epoch": 0.5422250087200703, "grad_norm": 1.532306432723999, "learning_rate": 4.5598369056993505e-06, "loss": 0.7572, "step": 15351 }, { "epoch": 0.5422603305237782, "grad_norm": 1.5921597480773926, "learning_rate": 4.559267119478142e-06, "loss": 0.7998, "step": 15352 }, { "epoch": 0.5422956523274861, "grad_norm": 1.833572506904602, "learning_rate": 4.5586973390251635e-06, "loss": 0.7937, "step": 15353 }, { "epoch": 0.542330974131194, "grad_norm": 1.6156400442123413, "learning_rate": 4.558127564347872e-06, "loss": 0.7681, "step": 15354 }, { "epoch": 0.5423662959349019, "grad_norm": 1.624776005744934, "learning_rate": 4.557557795453722e-06, "loss": 0.8061, "step": 15355 }, { "epoch": 0.5424016177386098, "grad_norm": 1.9059505462646484, "learning_rate": 4.556988032350175e-06, "loss": 0.7711, "step": 15356 }, { "epoch": 0.5424369395423178, "grad_norm": 1.5955675840377808, "learning_rate": 4.556418275044685e-06, "loss": 0.7585, "step": 15357 }, { "epoch": 0.5424722613460257, "grad_norm": 1.517439842224121, "learning_rate": 4.555848523544709e-06, "loss": 0.7803, "step": 15358 }, { "epoch": 0.5425075831497336, "grad_norm": 1.7482677698135376, "learning_rate": 4.555278777857702e-06, "loss": 0.7714, "step": 15359 }, { "epoch": 0.5425429049534415, "grad_norm": 1.7057052850723267, "learning_rate": 4.5547090379911245e-06, "loss": 0.7755, "step": 15360 }, { "epoch": 0.5425782267571494, "grad_norm": 1.7829135656356812, "learning_rate": 4.554139303952432e-06, "loss": 0.8072, "step": 15361 }, { "epoch": 0.5426135485608573, "grad_norm": 1.7996530532836914, "learning_rate": 4.553569575749078e-06, "loss": 0.8138, "step": 15362 }, { "epoch": 0.5426488703645652, "grad_norm": 1.8481031656265259, "learning_rate": 4.5529998533885235e-06, "loss": 0.791, "step": 15363 }, { "epoch": 0.5426841921682731, "grad_norm": 1.722711205482483, "learning_rate": 4.552430136878222e-06, "loss": 0.8103, "step": 15364 }, { "epoch": 0.5427195139719809, "grad_norm": 1.6460407972335815, "learning_rate": 4.551860426225631e-06, "loss": 0.8043, "step": 15365 }, { "epoch": 0.5427548357756888, "grad_norm": 1.7117247581481934, "learning_rate": 4.551290721438207e-06, "loss": 0.7844, "step": 15366 }, { "epoch": 0.5427901575793967, "grad_norm": 1.655280351638794, "learning_rate": 4.550721022523404e-06, "loss": 0.8132, "step": 15367 }, { "epoch": 0.5428254793831047, "grad_norm": 1.7415143251419067, "learning_rate": 4.550151329488682e-06, "loss": 0.8062, "step": 15368 }, { "epoch": 0.5428608011868126, "grad_norm": 1.6346603631973267, "learning_rate": 4.549581642341494e-06, "loss": 0.8191, "step": 15369 }, { "epoch": 0.5428961229905205, "grad_norm": 1.7567094564437866, "learning_rate": 4.5490119610892956e-06, "loss": 0.7931, "step": 15370 }, { "epoch": 0.5429314447942284, "grad_norm": 1.8609575033187866, "learning_rate": 4.548442285739546e-06, "loss": 0.8327, "step": 15371 }, { "epoch": 0.5429667665979363, "grad_norm": 1.6966112852096558, "learning_rate": 4.547872616299698e-06, "loss": 0.8262, "step": 15372 }, { "epoch": 0.5430020884016442, "grad_norm": 1.584346890449524, "learning_rate": 4.547302952777207e-06, "loss": 0.7835, "step": 15373 }, { "epoch": 0.5430374102053521, "grad_norm": 1.5762457847595215, "learning_rate": 4.546733295179531e-06, "loss": 0.7832, "step": 15374 }, { "epoch": 0.54307273200906, "grad_norm": 1.5981401205062866, "learning_rate": 4.546163643514126e-06, "loss": 0.7592, "step": 15375 }, { "epoch": 0.5431080538127679, "grad_norm": 1.763370394706726, "learning_rate": 4.545593997788445e-06, "loss": 0.809, "step": 15376 }, { "epoch": 0.5431433756164759, "grad_norm": 1.7993782758712769, "learning_rate": 4.545024358009943e-06, "loss": 0.8017, "step": 15377 }, { "epoch": 0.5431786974201838, "grad_norm": 1.5288705825805664, "learning_rate": 4.544454724186077e-06, "loss": 0.7944, "step": 15378 }, { "epoch": 0.5432140192238917, "grad_norm": 1.6874028444290161, "learning_rate": 4.543885096324303e-06, "loss": 0.7777, "step": 15379 }, { "epoch": 0.5432493410275996, "grad_norm": 1.5266584157943726, "learning_rate": 4.543315474432073e-06, "loss": 0.7712, "step": 15380 }, { "epoch": 0.5432846628313075, "grad_norm": 1.539085865020752, "learning_rate": 4.542745858516846e-06, "loss": 0.7957, "step": 15381 }, { "epoch": 0.5433199846350154, "grad_norm": 1.6419084072113037, "learning_rate": 4.542176248586075e-06, "loss": 0.7763, "step": 15382 }, { "epoch": 0.5433553064387233, "grad_norm": 1.5782265663146973, "learning_rate": 4.541606644647214e-06, "loss": 0.7967, "step": 15383 }, { "epoch": 0.5433906282424312, "grad_norm": 1.6799848079681396, "learning_rate": 4.541037046707721e-06, "loss": 0.7923, "step": 15384 }, { "epoch": 0.5434259500461391, "grad_norm": 1.6027414798736572, "learning_rate": 4.5404674547750486e-06, "loss": 0.8081, "step": 15385 }, { "epoch": 0.543461271849847, "grad_norm": 1.7151784896850586, "learning_rate": 4.53989786885665e-06, "loss": 0.7868, "step": 15386 }, { "epoch": 0.543496593653555, "grad_norm": 1.6107879877090454, "learning_rate": 4.539328288959982e-06, "loss": 0.7959, "step": 15387 }, { "epoch": 0.5435319154572629, "grad_norm": 1.553191900253296, "learning_rate": 4.5387587150925e-06, "loss": 0.7846, "step": 15388 }, { "epoch": 0.5435672372609708, "grad_norm": 1.9421813488006592, "learning_rate": 4.538189147261657e-06, "loss": 0.827, "step": 15389 }, { "epoch": 0.5436025590646787, "grad_norm": 2.0020265579223633, "learning_rate": 4.537619585474907e-06, "loss": 0.7789, "step": 15390 }, { "epoch": 0.5436378808683865, "grad_norm": 1.610178828239441, "learning_rate": 4.537050029739705e-06, "loss": 0.7885, "step": 15391 }, { "epoch": 0.5436732026720944, "grad_norm": 1.557100772857666, "learning_rate": 4.5364804800635055e-06, "loss": 0.7849, "step": 15392 }, { "epoch": 0.5437085244758023, "grad_norm": 1.5990201234817505, "learning_rate": 4.535910936453762e-06, "loss": 0.7878, "step": 15393 }, { "epoch": 0.5437438462795102, "grad_norm": 1.675363540649414, "learning_rate": 4.535341398917931e-06, "loss": 0.8294, "step": 15394 }, { "epoch": 0.5437791680832181, "grad_norm": 1.5591763257980347, "learning_rate": 4.534771867463463e-06, "loss": 0.7568, "step": 15395 }, { "epoch": 0.543814489886926, "grad_norm": 1.878798246383667, "learning_rate": 4.534202342097813e-06, "loss": 0.8011, "step": 15396 }, { "epoch": 0.543849811690634, "grad_norm": 1.5278129577636719, "learning_rate": 4.533632822828435e-06, "loss": 0.7897, "step": 15397 }, { "epoch": 0.5438851334943419, "grad_norm": 1.9453637599945068, "learning_rate": 4.5330633096627834e-06, "loss": 0.8295, "step": 15398 }, { "epoch": 0.5439204552980498, "grad_norm": 1.6737688779830933, "learning_rate": 4.532493802608311e-06, "loss": 0.7753, "step": 15399 }, { "epoch": 0.5439557771017577, "grad_norm": 1.6948118209838867, "learning_rate": 4.531924301672472e-06, "loss": 0.758, "step": 15400 }, { "epoch": 0.5439910989054656, "grad_norm": 2.2211596965789795, "learning_rate": 4.531354806862719e-06, "loss": 0.8047, "step": 15401 }, { "epoch": 0.5440264207091735, "grad_norm": 1.5672991275787354, "learning_rate": 4.530785318186507e-06, "loss": 0.7876, "step": 15402 }, { "epoch": 0.5440617425128814, "grad_norm": 1.6703109741210938, "learning_rate": 4.530215835651289e-06, "loss": 0.8098, "step": 15403 }, { "epoch": 0.5440970643165893, "grad_norm": 1.6942574977874756, "learning_rate": 4.5296463592645165e-06, "loss": 0.7964, "step": 15404 }, { "epoch": 0.5441323861202972, "grad_norm": 1.6338974237442017, "learning_rate": 4.529076889033645e-06, "loss": 0.8055, "step": 15405 }, { "epoch": 0.5441677079240052, "grad_norm": 2.260568857192993, "learning_rate": 4.528507424966126e-06, "loss": 0.7913, "step": 15406 }, { "epoch": 0.5442030297277131, "grad_norm": 1.726474642753601, "learning_rate": 4.527937967069413e-06, "loss": 0.8081, "step": 15407 }, { "epoch": 0.544238351531421, "grad_norm": 1.6654484272003174, "learning_rate": 4.527368515350961e-06, "loss": 0.7997, "step": 15408 }, { "epoch": 0.5442736733351289, "grad_norm": 1.6407413482666016, "learning_rate": 4.52679906981822e-06, "loss": 0.7676, "step": 15409 }, { "epoch": 0.5443089951388368, "grad_norm": 1.6431740522384644, "learning_rate": 4.526229630478643e-06, "loss": 0.8241, "step": 15410 }, { "epoch": 0.5443443169425447, "grad_norm": 1.8319579362869263, "learning_rate": 4.525660197339685e-06, "loss": 0.8046, "step": 15411 }, { "epoch": 0.5443796387462526, "grad_norm": 1.8085639476776123, "learning_rate": 4.525090770408797e-06, "loss": 0.7784, "step": 15412 }, { "epoch": 0.5444149605499605, "grad_norm": 1.6018575429916382, "learning_rate": 4.524521349693432e-06, "loss": 0.7977, "step": 15413 }, { "epoch": 0.5444502823536684, "grad_norm": 1.537399411201477, "learning_rate": 4.52395193520104e-06, "loss": 0.7628, "step": 15414 }, { "epoch": 0.5444856041573763, "grad_norm": 2.350374937057495, "learning_rate": 4.523382526939078e-06, "loss": 0.7895, "step": 15415 }, { "epoch": 0.5445209259610843, "grad_norm": 1.747219443321228, "learning_rate": 4.5228131249149945e-06, "loss": 0.7907, "step": 15416 }, { "epoch": 0.5445562477647921, "grad_norm": 1.5784742832183838, "learning_rate": 4.522243729136243e-06, "loss": 0.771, "step": 15417 }, { "epoch": 0.5445915695685, "grad_norm": 1.5962028503417969, "learning_rate": 4.521674339610277e-06, "loss": 0.7867, "step": 15418 }, { "epoch": 0.5446268913722079, "grad_norm": 1.665541172027588, "learning_rate": 4.5211049563445455e-06, "loss": 0.8013, "step": 15419 }, { "epoch": 0.5446622131759158, "grad_norm": 1.617447853088379, "learning_rate": 4.520535579346503e-06, "loss": 0.7883, "step": 15420 }, { "epoch": 0.5446975349796237, "grad_norm": 1.860784649848938, "learning_rate": 4.519966208623601e-06, "loss": 0.7797, "step": 15421 }, { "epoch": 0.5447328567833316, "grad_norm": 1.6718509197235107, "learning_rate": 4.519396844183292e-06, "loss": 0.7716, "step": 15422 }, { "epoch": 0.5447681785870395, "grad_norm": 1.810158371925354, "learning_rate": 4.518827486033025e-06, "loss": 0.8375, "step": 15423 }, { "epoch": 0.5448035003907474, "grad_norm": 1.773851990699768, "learning_rate": 4.518258134180254e-06, "loss": 0.8166, "step": 15424 }, { "epoch": 0.5448388221944553, "grad_norm": 1.7991405725479126, "learning_rate": 4.51768878863243e-06, "loss": 0.7561, "step": 15425 }, { "epoch": 0.5448741439981633, "grad_norm": 1.4317686557769775, "learning_rate": 4.517119449397005e-06, "loss": 0.7165, "step": 15426 }, { "epoch": 0.5449094658018712, "grad_norm": 1.675003170967102, "learning_rate": 4.51655011648143e-06, "loss": 0.8038, "step": 15427 }, { "epoch": 0.5449447876055791, "grad_norm": 1.6336439847946167, "learning_rate": 4.515980789893155e-06, "loss": 0.8001, "step": 15428 }, { "epoch": 0.544980109409287, "grad_norm": 1.6862198114395142, "learning_rate": 4.5154114696396335e-06, "loss": 0.7917, "step": 15429 }, { "epoch": 0.5450154312129949, "grad_norm": 1.7410699129104614, "learning_rate": 4.514842155728316e-06, "loss": 0.7784, "step": 15430 }, { "epoch": 0.5450507530167028, "grad_norm": 1.5142900943756104, "learning_rate": 4.514272848166653e-06, "loss": 0.771, "step": 15431 }, { "epoch": 0.5450860748204107, "grad_norm": 2.0990428924560547, "learning_rate": 4.513703546962095e-06, "loss": 0.8011, "step": 15432 }, { "epoch": 0.5451213966241186, "grad_norm": 1.8058959245681763, "learning_rate": 4.513134252122093e-06, "loss": 0.8174, "step": 15433 }, { "epoch": 0.5451567184278265, "grad_norm": 2.0222792625427246, "learning_rate": 4.5125649636541e-06, "loss": 0.7581, "step": 15434 }, { "epoch": 0.5451920402315344, "grad_norm": 1.5712833404541016, "learning_rate": 4.511995681565563e-06, "loss": 0.7974, "step": 15435 }, { "epoch": 0.5452273620352424, "grad_norm": 1.7641117572784424, "learning_rate": 4.511426405863936e-06, "loss": 0.7877, "step": 15436 }, { "epoch": 0.5452626838389503, "grad_norm": 1.62274169921875, "learning_rate": 4.510857136556669e-06, "loss": 0.7908, "step": 15437 }, { "epoch": 0.5452980056426582, "grad_norm": 1.687286138534546, "learning_rate": 4.510287873651211e-06, "loss": 0.78, "step": 15438 }, { "epoch": 0.5453333274463661, "grad_norm": 1.6259186267852783, "learning_rate": 4.509718617155013e-06, "loss": 0.741, "step": 15439 }, { "epoch": 0.545368649250074, "grad_norm": 1.4869898557662964, "learning_rate": 4.509149367075526e-06, "loss": 0.7827, "step": 15440 }, { "epoch": 0.5454039710537819, "grad_norm": 1.816654086112976, "learning_rate": 4.5085801234202e-06, "loss": 0.7857, "step": 15441 }, { "epoch": 0.5454392928574898, "grad_norm": 1.76682710647583, "learning_rate": 4.508010886196484e-06, "loss": 0.8063, "step": 15442 }, { "epoch": 0.5454746146611976, "grad_norm": 1.7059705257415771, "learning_rate": 4.50744165541183e-06, "loss": 0.8026, "step": 15443 }, { "epoch": 0.5455099364649055, "grad_norm": 1.562854528427124, "learning_rate": 4.506872431073686e-06, "loss": 0.7767, "step": 15444 }, { "epoch": 0.5455452582686134, "grad_norm": 1.6964383125305176, "learning_rate": 4.506303213189505e-06, "loss": 0.7921, "step": 15445 }, { "epoch": 0.5455805800723214, "grad_norm": 1.552891731262207, "learning_rate": 4.505734001766734e-06, "loss": 0.7779, "step": 15446 }, { "epoch": 0.5456159018760293, "grad_norm": 2.084867477416992, "learning_rate": 4.505164796812822e-06, "loss": 0.7934, "step": 15447 }, { "epoch": 0.5456512236797372, "grad_norm": 1.7375742197036743, "learning_rate": 4.504595598335221e-06, "loss": 0.8085, "step": 15448 }, { "epoch": 0.5456865454834451, "grad_norm": 1.5940738916397095, "learning_rate": 4.504026406341382e-06, "loss": 0.8151, "step": 15449 }, { "epoch": 0.545721867287153, "grad_norm": 1.6374694108963013, "learning_rate": 4.50345722083875e-06, "loss": 0.7751, "step": 15450 }, { "epoch": 0.5457571890908609, "grad_norm": 1.747121810913086, "learning_rate": 4.502888041834775e-06, "loss": 0.8113, "step": 15451 }, { "epoch": 0.5457925108945688, "grad_norm": 1.6305142641067505, "learning_rate": 4.5023188693369105e-06, "loss": 0.7772, "step": 15452 }, { "epoch": 0.5458278326982767, "grad_norm": 1.6907446384429932, "learning_rate": 4.501749703352602e-06, "loss": 0.7568, "step": 15453 }, { "epoch": 0.5458631545019846, "grad_norm": 1.6200278997421265, "learning_rate": 4.501180543889299e-06, "loss": 0.7899, "step": 15454 }, { "epoch": 0.5458984763056925, "grad_norm": 1.5603816509246826, "learning_rate": 4.500611390954453e-06, "loss": 0.7491, "step": 15455 }, { "epoch": 0.5459337981094005, "grad_norm": 1.654897689819336, "learning_rate": 4.50004224455551e-06, "loss": 0.7644, "step": 15456 }, { "epoch": 0.5459691199131084, "grad_norm": 1.6944060325622559, "learning_rate": 4.499473104699919e-06, "loss": 0.8192, "step": 15457 }, { "epoch": 0.5460044417168163, "grad_norm": 1.6587467193603516, "learning_rate": 4.498903971395132e-06, "loss": 0.7765, "step": 15458 }, { "epoch": 0.5460397635205242, "grad_norm": 1.507227897644043, "learning_rate": 4.498334844648594e-06, "loss": 0.7931, "step": 15459 }, { "epoch": 0.5460750853242321, "grad_norm": 1.5739046335220337, "learning_rate": 4.497765724467756e-06, "loss": 0.7831, "step": 15460 }, { "epoch": 0.54611040712794, "grad_norm": 1.833764910697937, "learning_rate": 4.497196610860066e-06, "loss": 0.8199, "step": 15461 }, { "epoch": 0.5461457289316479, "grad_norm": 1.5103322267532349, "learning_rate": 4.496627503832971e-06, "loss": 0.8158, "step": 15462 }, { "epoch": 0.5461810507353558, "grad_norm": 1.6923233270645142, "learning_rate": 4.496058403393923e-06, "loss": 0.7946, "step": 15463 }, { "epoch": 0.5462163725390637, "grad_norm": 1.5320451259613037, "learning_rate": 4.495489309550367e-06, "loss": 0.7934, "step": 15464 }, { "epoch": 0.5462516943427717, "grad_norm": 1.700791835784912, "learning_rate": 4.4949202223097504e-06, "loss": 0.8001, "step": 15465 }, { "epoch": 0.5462870161464796, "grad_norm": 1.6456061601638794, "learning_rate": 4.494351141679525e-06, "loss": 0.7612, "step": 15466 }, { "epoch": 0.5463223379501875, "grad_norm": 1.7281543016433716, "learning_rate": 4.493782067667137e-06, "loss": 0.7861, "step": 15467 }, { "epoch": 0.5463576597538954, "grad_norm": 1.8674719333648682, "learning_rate": 4.493213000280034e-06, "loss": 0.7841, "step": 15468 }, { "epoch": 0.5463929815576032, "grad_norm": 1.6616281270980835, "learning_rate": 4.492643939525662e-06, "loss": 0.7884, "step": 15469 }, { "epoch": 0.5464283033613111, "grad_norm": 1.4669517278671265, "learning_rate": 4.4920748854114725e-06, "loss": 0.7616, "step": 15470 }, { "epoch": 0.546463625165019, "grad_norm": 2.8312458992004395, "learning_rate": 4.491505837944911e-06, "loss": 0.7932, "step": 15471 }, { "epoch": 0.5464989469687269, "grad_norm": 1.710194706916809, "learning_rate": 4.490936797133425e-06, "loss": 0.7985, "step": 15472 }, { "epoch": 0.5465342687724348, "grad_norm": 1.802335262298584, "learning_rate": 4.490367762984463e-06, "loss": 0.7762, "step": 15473 }, { "epoch": 0.5465695905761427, "grad_norm": 1.7001442909240723, "learning_rate": 4.489798735505472e-06, "loss": 0.8352, "step": 15474 }, { "epoch": 0.5466049123798506, "grad_norm": 1.6612917184829712, "learning_rate": 4.489229714703899e-06, "loss": 0.8019, "step": 15475 }, { "epoch": 0.5466402341835586, "grad_norm": 1.6986405849456787, "learning_rate": 4.488660700587191e-06, "loss": 0.7826, "step": 15476 }, { "epoch": 0.5466755559872665, "grad_norm": 3.627206563949585, "learning_rate": 4.488091693162797e-06, "loss": 0.7973, "step": 15477 }, { "epoch": 0.5467108777909744, "grad_norm": 1.7282099723815918, "learning_rate": 4.487522692438161e-06, "loss": 0.7718, "step": 15478 }, { "epoch": 0.5467461995946823, "grad_norm": 2.104060173034668, "learning_rate": 4.486953698420733e-06, "loss": 0.7853, "step": 15479 }, { "epoch": 0.5467815213983902, "grad_norm": 1.7278058528900146, "learning_rate": 4.486384711117958e-06, "loss": 0.7778, "step": 15480 }, { "epoch": 0.5468168432020981, "grad_norm": 1.4559792280197144, "learning_rate": 4.485815730537283e-06, "loss": 0.7891, "step": 15481 }, { "epoch": 0.546852165005806, "grad_norm": 1.7080284357070923, "learning_rate": 4.485246756686156e-06, "loss": 0.7998, "step": 15482 }, { "epoch": 0.5468874868095139, "grad_norm": 1.6655855178833008, "learning_rate": 4.484677789572021e-06, "loss": 0.802, "step": 15483 }, { "epoch": 0.5469228086132218, "grad_norm": 1.6558219194412231, "learning_rate": 4.484108829202328e-06, "loss": 0.8034, "step": 15484 }, { "epoch": 0.5469581304169298, "grad_norm": 1.889281988143921, "learning_rate": 4.483539875584521e-06, "loss": 0.7794, "step": 15485 }, { "epoch": 0.5469934522206377, "grad_norm": 1.572346806526184, "learning_rate": 4.482970928726048e-06, "loss": 0.7879, "step": 15486 }, { "epoch": 0.5470287740243456, "grad_norm": 1.6692733764648438, "learning_rate": 4.4824019886343545e-06, "loss": 0.7791, "step": 15487 }, { "epoch": 0.5470640958280535, "grad_norm": 1.656884789466858, "learning_rate": 4.4818330553168845e-06, "loss": 0.7424, "step": 15488 }, { "epoch": 0.5470994176317614, "grad_norm": 1.5337989330291748, "learning_rate": 4.481264128781087e-06, "loss": 0.7574, "step": 15489 }, { "epoch": 0.5471347394354693, "grad_norm": 1.765575885772705, "learning_rate": 4.480695209034407e-06, "loss": 0.7834, "step": 15490 }, { "epoch": 0.5471700612391772, "grad_norm": 1.7559691667556763, "learning_rate": 4.48012629608429e-06, "loss": 0.8138, "step": 15491 }, { "epoch": 0.5472053830428851, "grad_norm": 1.5222082138061523, "learning_rate": 4.479557389938184e-06, "loss": 0.7877, "step": 15492 }, { "epoch": 0.547240704846593, "grad_norm": 1.7433127164840698, "learning_rate": 4.478988490603531e-06, "loss": 0.8012, "step": 15493 }, { "epoch": 0.547276026650301, "grad_norm": 1.870301604270935, "learning_rate": 4.478419598087779e-06, "loss": 0.7852, "step": 15494 }, { "epoch": 0.5473113484540088, "grad_norm": 2.069077491760254, "learning_rate": 4.477850712398375e-06, "loss": 0.7935, "step": 15495 }, { "epoch": 0.5473466702577167, "grad_norm": 1.7960536479949951, "learning_rate": 4.47728183354276e-06, "loss": 0.777, "step": 15496 }, { "epoch": 0.5473819920614246, "grad_norm": 1.6061623096466064, "learning_rate": 4.476712961528384e-06, "loss": 0.7957, "step": 15497 }, { "epoch": 0.5474173138651325, "grad_norm": 1.9843090772628784, "learning_rate": 4.47614409636269e-06, "loss": 0.8031, "step": 15498 }, { "epoch": 0.5474526356688404, "grad_norm": 1.811180591583252, "learning_rate": 4.4755752380531235e-06, "loss": 0.7483, "step": 15499 }, { "epoch": 0.5474879574725483, "grad_norm": 1.7855125665664673, "learning_rate": 4.47500638660713e-06, "loss": 0.804, "step": 15500 }, { "epoch": 0.5475232792762562, "grad_norm": 1.664843201637268, "learning_rate": 4.474437542032154e-06, "loss": 0.7952, "step": 15501 }, { "epoch": 0.5475586010799641, "grad_norm": 2.218841075897217, "learning_rate": 4.47386870433564e-06, "loss": 0.7852, "step": 15502 }, { "epoch": 0.547593922883672, "grad_norm": 1.6302207708358765, "learning_rate": 4.473299873525034e-06, "loss": 0.7571, "step": 15503 }, { "epoch": 0.54762924468738, "grad_norm": 1.6024014949798584, "learning_rate": 4.472731049607782e-06, "loss": 0.7952, "step": 15504 }, { "epoch": 0.5476645664910879, "grad_norm": 0.8941923975944519, "learning_rate": 4.472162232591326e-06, "loss": 0.5992, "step": 15505 }, { "epoch": 0.5476998882947958, "grad_norm": 1.9086064100265503, "learning_rate": 4.47159342248311e-06, "loss": 0.7943, "step": 15506 }, { "epoch": 0.5477352100985037, "grad_norm": 1.59799063205719, "learning_rate": 4.471024619290581e-06, "loss": 0.7638, "step": 15507 }, { "epoch": 0.5477705319022116, "grad_norm": 1.704487919807434, "learning_rate": 4.470455823021183e-06, "loss": 0.7793, "step": 15508 }, { "epoch": 0.5478058537059195, "grad_norm": 1.825451374053955, "learning_rate": 4.469887033682357e-06, "loss": 0.7782, "step": 15509 }, { "epoch": 0.5478411755096274, "grad_norm": 1.8063372373580933, "learning_rate": 4.469318251281553e-06, "loss": 0.822, "step": 15510 }, { "epoch": 0.5478764973133353, "grad_norm": 1.4746179580688477, "learning_rate": 4.4687494758262104e-06, "loss": 0.7847, "step": 15511 }, { "epoch": 0.5479118191170432, "grad_norm": 1.9463940858840942, "learning_rate": 4.4681807073237735e-06, "loss": 0.7934, "step": 15512 }, { "epoch": 0.5479471409207511, "grad_norm": 1.5841484069824219, "learning_rate": 4.467611945781689e-06, "loss": 0.7707, "step": 15513 }, { "epoch": 0.547982462724459, "grad_norm": 1.6480178833007812, "learning_rate": 4.4670431912074005e-06, "loss": 0.7966, "step": 15514 }, { "epoch": 0.548017784528167, "grad_norm": 1.726050853729248, "learning_rate": 4.4664744436083485e-06, "loss": 0.7667, "step": 15515 }, { "epoch": 0.5480531063318749, "grad_norm": 1.483079433441162, "learning_rate": 4.465905702991979e-06, "loss": 0.8055, "step": 15516 }, { "epoch": 0.5480884281355828, "grad_norm": 1.7714165449142456, "learning_rate": 4.465336969365735e-06, "loss": 0.7998, "step": 15517 }, { "epoch": 0.5481237499392907, "grad_norm": 1.864013671875, "learning_rate": 4.464768242737062e-06, "loss": 0.7864, "step": 15518 }, { "epoch": 0.5481590717429986, "grad_norm": 0.9772237539291382, "learning_rate": 4.464199523113401e-06, "loss": 0.5765, "step": 15519 }, { "epoch": 0.5481943935467065, "grad_norm": 1.7174540758132935, "learning_rate": 4.4636308105021945e-06, "loss": 0.8114, "step": 15520 }, { "epoch": 0.5482297153504143, "grad_norm": 1.9287097454071045, "learning_rate": 4.4630621049108885e-06, "loss": 0.7474, "step": 15521 }, { "epoch": 0.5482650371541222, "grad_norm": 1.4756542444229126, "learning_rate": 4.462493406346925e-06, "loss": 0.7624, "step": 15522 }, { "epoch": 0.5483003589578301, "grad_norm": 1.6691596508026123, "learning_rate": 4.461924714817748e-06, "loss": 0.7827, "step": 15523 }, { "epoch": 0.548335680761538, "grad_norm": 1.6466273069381714, "learning_rate": 4.461356030330797e-06, "loss": 0.7811, "step": 15524 }, { "epoch": 0.548371002565246, "grad_norm": 2.1139750480651855, "learning_rate": 4.460787352893517e-06, "loss": 0.7846, "step": 15525 }, { "epoch": 0.5484063243689539, "grad_norm": 1.7051544189453125, "learning_rate": 4.460218682513352e-06, "loss": 0.788, "step": 15526 }, { "epoch": 0.5484416461726618, "grad_norm": 1.7366925477981567, "learning_rate": 4.459650019197741e-06, "loss": 0.7889, "step": 15527 }, { "epoch": 0.5484769679763697, "grad_norm": 1.6636592149734497, "learning_rate": 4.459081362954131e-06, "loss": 0.8113, "step": 15528 }, { "epoch": 0.5485122897800776, "grad_norm": 1.753047227859497, "learning_rate": 4.458512713789962e-06, "loss": 0.7448, "step": 15529 }, { "epoch": 0.5485476115837855, "grad_norm": 1.626726746559143, "learning_rate": 4.457944071712676e-06, "loss": 0.7715, "step": 15530 }, { "epoch": 0.5485829333874934, "grad_norm": 1.6566896438598633, "learning_rate": 4.457375436729717e-06, "loss": 0.7867, "step": 15531 }, { "epoch": 0.5486182551912013, "grad_norm": 1.7064411640167236, "learning_rate": 4.456806808848527e-06, "loss": 0.7991, "step": 15532 }, { "epoch": 0.5486535769949092, "grad_norm": 1.683837890625, "learning_rate": 4.456238188076545e-06, "loss": 0.7851, "step": 15533 }, { "epoch": 0.5486888987986172, "grad_norm": 1.825637936592102, "learning_rate": 4.455669574421217e-06, "loss": 0.8101, "step": 15534 }, { "epoch": 0.5487242206023251, "grad_norm": 1.607911467552185, "learning_rate": 4.455100967889984e-06, "loss": 0.7718, "step": 15535 }, { "epoch": 0.548759542406033, "grad_norm": 1.5660635232925415, "learning_rate": 4.4545323684902855e-06, "loss": 0.8099, "step": 15536 }, { "epoch": 0.5487948642097409, "grad_norm": 1.8128780126571655, "learning_rate": 4.453963776229566e-06, "loss": 0.8085, "step": 15537 }, { "epoch": 0.5488301860134488, "grad_norm": 1.7235240936279297, "learning_rate": 4.453395191115266e-06, "loss": 0.7908, "step": 15538 }, { "epoch": 0.5488655078171567, "grad_norm": 1.9208338260650635, "learning_rate": 4.452826613154826e-06, "loss": 0.8288, "step": 15539 }, { "epoch": 0.5489008296208646, "grad_norm": 1.5871080160140991, "learning_rate": 4.452258042355689e-06, "loss": 0.7696, "step": 15540 }, { "epoch": 0.5489361514245725, "grad_norm": 1.6452710628509521, "learning_rate": 4.451689478725298e-06, "loss": 0.7983, "step": 15541 }, { "epoch": 0.5489714732282804, "grad_norm": 1.5523990392684937, "learning_rate": 4.451120922271089e-06, "loss": 0.7407, "step": 15542 }, { "epoch": 0.5490067950319883, "grad_norm": 1.9454667568206787, "learning_rate": 4.450552373000507e-06, "loss": 0.8146, "step": 15543 }, { "epoch": 0.5490421168356963, "grad_norm": 1.6569114923477173, "learning_rate": 4.449983830920992e-06, "loss": 0.7971, "step": 15544 }, { "epoch": 0.5490774386394042, "grad_norm": 1.7830708026885986, "learning_rate": 4.4494152960399864e-06, "loss": 0.7997, "step": 15545 }, { "epoch": 0.5491127604431121, "grad_norm": 1.7723201513290405, "learning_rate": 4.448846768364928e-06, "loss": 0.7687, "step": 15546 }, { "epoch": 0.5491480822468199, "grad_norm": 2.0598697662353516, "learning_rate": 4.448278247903261e-06, "loss": 0.7832, "step": 15547 }, { "epoch": 0.5491834040505278, "grad_norm": 1.585026741027832, "learning_rate": 4.447709734662424e-06, "loss": 0.7975, "step": 15548 }, { "epoch": 0.5492187258542357, "grad_norm": 1.6083080768585205, "learning_rate": 4.4471412286498574e-06, "loss": 0.7842, "step": 15549 }, { "epoch": 0.5492540476579436, "grad_norm": 1.7063815593719482, "learning_rate": 4.446572729873004e-06, "loss": 0.7977, "step": 15550 }, { "epoch": 0.5492893694616515, "grad_norm": 1.7716834545135498, "learning_rate": 4.446004238339301e-06, "loss": 0.8095, "step": 15551 }, { "epoch": 0.5493246912653594, "grad_norm": 1.7132036685943604, "learning_rate": 4.445435754056192e-06, "loss": 0.8088, "step": 15552 }, { "epoch": 0.5493600130690673, "grad_norm": 1.5784595012664795, "learning_rate": 4.444867277031115e-06, "loss": 0.7907, "step": 15553 }, { "epoch": 0.5493953348727753, "grad_norm": 1.5759327411651611, "learning_rate": 4.44429880727151e-06, "loss": 0.7974, "step": 15554 }, { "epoch": 0.5494306566764832, "grad_norm": 1.7145934104919434, "learning_rate": 4.443730344784819e-06, "loss": 0.8127, "step": 15555 }, { "epoch": 0.5494659784801911, "grad_norm": 1.729109287261963, "learning_rate": 4.44316188957848e-06, "loss": 0.7725, "step": 15556 }, { "epoch": 0.549501300283899, "grad_norm": 1.6949536800384521, "learning_rate": 4.442593441659933e-06, "loss": 0.8026, "step": 15557 }, { "epoch": 0.5495366220876069, "grad_norm": 1.7014249563217163, "learning_rate": 4.442025001036619e-06, "loss": 0.7789, "step": 15558 }, { "epoch": 0.5495719438913148, "grad_norm": 1.7320200204849243, "learning_rate": 4.441456567715977e-06, "loss": 0.7678, "step": 15559 }, { "epoch": 0.5496072656950227, "grad_norm": 1.656125783920288, "learning_rate": 4.440888141705448e-06, "loss": 0.8006, "step": 15560 }, { "epoch": 0.5496425874987306, "grad_norm": 1.6488889455795288, "learning_rate": 4.440319723012468e-06, "loss": 0.7883, "step": 15561 }, { "epoch": 0.5496779093024385, "grad_norm": 1.5177348852157593, "learning_rate": 4.439751311644478e-06, "loss": 0.7653, "step": 15562 }, { "epoch": 0.5497132311061464, "grad_norm": 1.7111769914627075, "learning_rate": 4.439182907608918e-06, "loss": 0.797, "step": 15563 }, { "epoch": 0.5497485529098544, "grad_norm": 1.690036654472351, "learning_rate": 4.4386145109132265e-06, "loss": 0.8046, "step": 15564 }, { "epoch": 0.5497838747135623, "grad_norm": 1.5737992525100708, "learning_rate": 4.438046121564843e-06, "loss": 0.7582, "step": 15565 }, { "epoch": 0.5498191965172702, "grad_norm": 1.7083752155303955, "learning_rate": 4.437477739571207e-06, "loss": 0.7899, "step": 15566 }, { "epoch": 0.5498545183209781, "grad_norm": 1.7660763263702393, "learning_rate": 4.436909364939755e-06, "loss": 0.8129, "step": 15567 }, { "epoch": 0.549889840124686, "grad_norm": 2.229905366897583, "learning_rate": 4.436340997677928e-06, "loss": 0.7663, "step": 15568 }, { "epoch": 0.5499251619283939, "grad_norm": 1.5730997323989868, "learning_rate": 4.435772637793164e-06, "loss": 0.7814, "step": 15569 }, { "epoch": 0.5499604837321018, "grad_norm": 1.600856900215149, "learning_rate": 4.435204285292901e-06, "loss": 0.7447, "step": 15570 }, { "epoch": 0.5499958055358097, "grad_norm": 1.7027535438537598, "learning_rate": 4.43463594018458e-06, "loss": 0.7824, "step": 15571 }, { "epoch": 0.5500311273395176, "grad_norm": 1.59490966796875, "learning_rate": 4.4340676024756366e-06, "loss": 0.794, "step": 15572 }, { "epoch": 0.5500664491432254, "grad_norm": 1.7395868301391602, "learning_rate": 4.433499272173509e-06, "loss": 0.8027, "step": 15573 }, { "epoch": 0.5501017709469334, "grad_norm": 1.6927591562271118, "learning_rate": 4.432930949285637e-06, "loss": 0.7813, "step": 15574 }, { "epoch": 0.5501370927506413, "grad_norm": 1.6414867639541626, "learning_rate": 4.432362633819458e-06, "loss": 0.823, "step": 15575 }, { "epoch": 0.5501724145543492, "grad_norm": 1.6549941301345825, "learning_rate": 4.431794325782411e-06, "loss": 0.7886, "step": 15576 }, { "epoch": 0.5502077363580571, "grad_norm": 1.6241919994354248, "learning_rate": 4.431226025181932e-06, "loss": 0.8171, "step": 15577 }, { "epoch": 0.550243058161765, "grad_norm": 1.8385311365127563, "learning_rate": 4.430657732025463e-06, "loss": 0.8221, "step": 15578 }, { "epoch": 0.5502783799654729, "grad_norm": 1.7186672687530518, "learning_rate": 4.430089446320436e-06, "loss": 0.823, "step": 15579 }, { "epoch": 0.5503137017691808, "grad_norm": 2.474980354309082, "learning_rate": 4.429521168074291e-06, "loss": 0.8004, "step": 15580 }, { "epoch": 0.5503490235728887, "grad_norm": 1.7010993957519531, "learning_rate": 4.428952897294466e-06, "loss": 0.7962, "step": 15581 }, { "epoch": 0.5503843453765966, "grad_norm": 1.6376961469650269, "learning_rate": 4.428384633988399e-06, "loss": 0.7581, "step": 15582 }, { "epoch": 0.5504196671803046, "grad_norm": 1.6349847316741943, "learning_rate": 4.427816378163525e-06, "loss": 0.7792, "step": 15583 }, { "epoch": 0.5504549889840125, "grad_norm": 0.9646049737930298, "learning_rate": 4.427248129827283e-06, "loss": 0.5768, "step": 15584 }, { "epoch": 0.5504903107877204, "grad_norm": 1.6457147598266602, "learning_rate": 4.4266798889871095e-06, "loss": 0.7849, "step": 15585 }, { "epoch": 0.5505256325914283, "grad_norm": 1.6002271175384521, "learning_rate": 4.426111655650443e-06, "loss": 0.815, "step": 15586 }, { "epoch": 0.5505609543951362, "grad_norm": 1.543544054031372, "learning_rate": 4.425543429824719e-06, "loss": 0.8033, "step": 15587 }, { "epoch": 0.5505962761988441, "grad_norm": 1.4551876783370972, "learning_rate": 4.424975211517374e-06, "loss": 0.75, "step": 15588 }, { "epoch": 0.550631598002552, "grad_norm": 1.586391568183899, "learning_rate": 4.424407000735845e-06, "loss": 0.7662, "step": 15589 }, { "epoch": 0.5506669198062599, "grad_norm": 1.6167094707489014, "learning_rate": 4.42383879748757e-06, "loss": 0.7983, "step": 15590 }, { "epoch": 0.5507022416099678, "grad_norm": 1.8779901266098022, "learning_rate": 4.4232706017799836e-06, "loss": 0.805, "step": 15591 }, { "epoch": 0.5507375634136757, "grad_norm": 1.5733364820480347, "learning_rate": 4.422702413620525e-06, "loss": 0.7727, "step": 15592 }, { "epoch": 0.5507728852173837, "grad_norm": 1.6442739963531494, "learning_rate": 4.422134233016627e-06, "loss": 0.8032, "step": 15593 }, { "epoch": 0.5508082070210916, "grad_norm": 1.5679771900177002, "learning_rate": 4.421566059975727e-06, "loss": 0.8126, "step": 15594 }, { "epoch": 0.5508435288247995, "grad_norm": 1.5527803897857666, "learning_rate": 4.420997894505263e-06, "loss": 0.8085, "step": 15595 }, { "epoch": 0.5508788506285074, "grad_norm": 1.669053316116333, "learning_rate": 4.4204297366126695e-06, "loss": 0.7972, "step": 15596 }, { "epoch": 0.5509141724322153, "grad_norm": 1.8374494314193726, "learning_rate": 4.419861586305384e-06, "loss": 0.7914, "step": 15597 }, { "epoch": 0.5509494942359232, "grad_norm": 1.7393122911453247, "learning_rate": 4.419293443590839e-06, "loss": 0.7602, "step": 15598 }, { "epoch": 0.550984816039631, "grad_norm": 1.5304052829742432, "learning_rate": 4.418725308476473e-06, "loss": 0.7958, "step": 15599 }, { "epoch": 0.5510201378433389, "grad_norm": 1.6375718116760254, "learning_rate": 4.41815718096972e-06, "loss": 0.7636, "step": 15600 }, { "epoch": 0.5510554596470468, "grad_norm": 1.6043583154678345, "learning_rate": 4.417589061078017e-06, "loss": 0.771, "step": 15601 }, { "epoch": 0.5510907814507547, "grad_norm": 1.7007783651351929, "learning_rate": 4.417020948808799e-06, "loss": 0.8178, "step": 15602 }, { "epoch": 0.5511261032544627, "grad_norm": 1.6197795867919922, "learning_rate": 4.416452844169502e-06, "loss": 0.7878, "step": 15603 }, { "epoch": 0.5511614250581706, "grad_norm": 1.7126948833465576, "learning_rate": 4.4158847471675586e-06, "loss": 0.7895, "step": 15604 }, { "epoch": 0.5511967468618785, "grad_norm": 1.6909189224243164, "learning_rate": 4.415316657810407e-06, "loss": 0.8355, "step": 15605 }, { "epoch": 0.5512320686655864, "grad_norm": 1.721756100654602, "learning_rate": 4.414748576105481e-06, "loss": 0.8063, "step": 15606 }, { "epoch": 0.5512673904692943, "grad_norm": 1.6300175189971924, "learning_rate": 4.414180502060215e-06, "loss": 0.784, "step": 15607 }, { "epoch": 0.5513027122730022, "grad_norm": 1.6114246845245361, "learning_rate": 4.413612435682046e-06, "loss": 0.8012, "step": 15608 }, { "epoch": 0.5513380340767101, "grad_norm": 1.6670215129852295, "learning_rate": 4.413044376978405e-06, "loss": 0.7934, "step": 15609 }, { "epoch": 0.551373355880418, "grad_norm": 2.1953680515289307, "learning_rate": 4.412476325956731e-06, "loss": 0.7961, "step": 15610 }, { "epoch": 0.5514086776841259, "grad_norm": 1.6365348100662231, "learning_rate": 4.411908282624455e-06, "loss": 0.7438, "step": 15611 }, { "epoch": 0.5514439994878338, "grad_norm": 1.6174143552780151, "learning_rate": 4.4113402469890136e-06, "loss": 0.7907, "step": 15612 }, { "epoch": 0.5514793212915418, "grad_norm": 1.6144773960113525, "learning_rate": 4.41077221905784e-06, "loss": 0.7593, "step": 15613 }, { "epoch": 0.5515146430952497, "grad_norm": 1.7010811567306519, "learning_rate": 4.410204198838369e-06, "loss": 0.7895, "step": 15614 }, { "epoch": 0.5515499648989576, "grad_norm": 1.6834344863891602, "learning_rate": 4.409636186338036e-06, "loss": 0.7723, "step": 15615 }, { "epoch": 0.5515852867026655, "grad_norm": 1.7232979536056519, "learning_rate": 4.4090681815642725e-06, "loss": 0.7792, "step": 15616 }, { "epoch": 0.5516206085063734, "grad_norm": 1.6235283613204956, "learning_rate": 4.4085001845245125e-06, "loss": 0.81, "step": 15617 }, { "epoch": 0.5516559303100813, "grad_norm": 0.9592337012290955, "learning_rate": 4.407932195226192e-06, "loss": 0.5684, "step": 15618 }, { "epoch": 0.5516912521137892, "grad_norm": 1.5583146810531616, "learning_rate": 4.407364213676742e-06, "loss": 0.7769, "step": 15619 }, { "epoch": 0.5517265739174971, "grad_norm": 1.6516673564910889, "learning_rate": 4.4067962398836e-06, "loss": 0.8204, "step": 15620 }, { "epoch": 0.551761895721205, "grad_norm": 1.6956361532211304, "learning_rate": 4.406228273854197e-06, "loss": 0.7714, "step": 15621 }, { "epoch": 0.551797217524913, "grad_norm": 2.23409366607666, "learning_rate": 4.405660315595966e-06, "loss": 0.7582, "step": 15622 }, { "epoch": 0.5518325393286209, "grad_norm": 2.342186689376831, "learning_rate": 4.4050923651163405e-06, "loss": 0.7722, "step": 15623 }, { "epoch": 0.5518678611323288, "grad_norm": 1.7872101068496704, "learning_rate": 4.404524422422756e-06, "loss": 0.779, "step": 15624 }, { "epoch": 0.5519031829360366, "grad_norm": 1.6793636083602905, "learning_rate": 4.403956487522641e-06, "loss": 0.8245, "step": 15625 }, { "epoch": 0.5519385047397445, "grad_norm": 1.6943566799163818, "learning_rate": 4.4033885604234335e-06, "loss": 0.7803, "step": 15626 }, { "epoch": 0.5519738265434524, "grad_norm": 1.7693450450897217, "learning_rate": 4.402820641132565e-06, "loss": 0.7943, "step": 15627 }, { "epoch": 0.5520091483471603, "grad_norm": 1.7650978565216064, "learning_rate": 4.402252729657466e-06, "loss": 0.7786, "step": 15628 }, { "epoch": 0.5520444701508682, "grad_norm": 1.9392176866531372, "learning_rate": 4.401684826005572e-06, "loss": 0.8204, "step": 15629 }, { "epoch": 0.5520797919545761, "grad_norm": 1.5890275239944458, "learning_rate": 4.401116930184315e-06, "loss": 0.7687, "step": 15630 }, { "epoch": 0.552115113758284, "grad_norm": 1.5573471784591675, "learning_rate": 4.400549042201126e-06, "loss": 0.8109, "step": 15631 }, { "epoch": 0.552150435561992, "grad_norm": 2.5936408042907715, "learning_rate": 4.399981162063439e-06, "loss": 0.7691, "step": 15632 }, { "epoch": 0.5521857573656999, "grad_norm": 1.6709480285644531, "learning_rate": 4.399413289778684e-06, "loss": 0.8104, "step": 15633 }, { "epoch": 0.5522210791694078, "grad_norm": 1.7081701755523682, "learning_rate": 4.3988454253542995e-06, "loss": 0.7857, "step": 15634 }, { "epoch": 0.5522564009731157, "grad_norm": 1.7757197618484497, "learning_rate": 4.398277568797709e-06, "loss": 0.7919, "step": 15635 }, { "epoch": 0.5522917227768236, "grad_norm": 1.5410637855529785, "learning_rate": 4.39770972011635e-06, "loss": 0.8023, "step": 15636 }, { "epoch": 0.5523270445805315, "grad_norm": 1.6607433557510376, "learning_rate": 4.397141879317653e-06, "loss": 0.8247, "step": 15637 }, { "epoch": 0.5523623663842394, "grad_norm": 1.5427591800689697, "learning_rate": 4.396574046409049e-06, "loss": 0.7599, "step": 15638 }, { "epoch": 0.5523976881879473, "grad_norm": 1.7266767024993896, "learning_rate": 4.396006221397971e-06, "loss": 0.7785, "step": 15639 }, { "epoch": 0.5524330099916552, "grad_norm": 1.861840844154358, "learning_rate": 4.3954384042918506e-06, "loss": 0.8008, "step": 15640 }, { "epoch": 0.5524683317953631, "grad_norm": 1.729609489440918, "learning_rate": 4.394870595098117e-06, "loss": 0.8161, "step": 15641 }, { "epoch": 0.552503653599071, "grad_norm": 2.2201287746429443, "learning_rate": 4.3943027938242044e-06, "loss": 0.8107, "step": 15642 }, { "epoch": 0.552538975402779, "grad_norm": 1.8412729501724243, "learning_rate": 4.393735000477543e-06, "loss": 0.8527, "step": 15643 }, { "epoch": 0.5525742972064869, "grad_norm": 1.5488874912261963, "learning_rate": 4.393167215065564e-06, "loss": 0.7828, "step": 15644 }, { "epoch": 0.5526096190101948, "grad_norm": 1.7146016359329224, "learning_rate": 4.3925994375956984e-06, "loss": 0.8166, "step": 15645 }, { "epoch": 0.5526449408139027, "grad_norm": 1.7813411951065063, "learning_rate": 4.392031668075376e-06, "loss": 0.8028, "step": 15646 }, { "epoch": 0.5526802626176106, "grad_norm": 1.7688894271850586, "learning_rate": 4.39146390651203e-06, "loss": 0.7771, "step": 15647 }, { "epoch": 0.5527155844213185, "grad_norm": 1.7841745615005493, "learning_rate": 4.39089615291309e-06, "loss": 0.8065, "step": 15648 }, { "epoch": 0.5527509062250264, "grad_norm": 1.7094173431396484, "learning_rate": 4.390328407285986e-06, "loss": 0.8006, "step": 15649 }, { "epoch": 0.5527862280287343, "grad_norm": 1.723519206047058, "learning_rate": 4.38976066963815e-06, "loss": 0.7688, "step": 15650 }, { "epoch": 0.5528215498324421, "grad_norm": 1.6788958311080933, "learning_rate": 4.389192939977012e-06, "loss": 0.8074, "step": 15651 }, { "epoch": 0.55285687163615, "grad_norm": 1.583409309387207, "learning_rate": 4.388625218310003e-06, "loss": 0.7513, "step": 15652 }, { "epoch": 0.552892193439858, "grad_norm": 1.8013406991958618, "learning_rate": 4.388057504644549e-06, "loss": 0.8095, "step": 15653 }, { "epoch": 0.5529275152435659, "grad_norm": 1.6776056289672852, "learning_rate": 4.3874897989880855e-06, "loss": 0.7871, "step": 15654 }, { "epoch": 0.5529628370472738, "grad_norm": 1.5891162157058716, "learning_rate": 4.38692210134804e-06, "loss": 0.8103, "step": 15655 }, { "epoch": 0.5529981588509817, "grad_norm": 1.9902094602584839, "learning_rate": 4.3863544117318415e-06, "loss": 0.7802, "step": 15656 }, { "epoch": 0.5530334806546896, "grad_norm": 1.7003371715545654, "learning_rate": 4.3857867301469225e-06, "loss": 0.8166, "step": 15657 }, { "epoch": 0.5530688024583975, "grad_norm": 1.6973556280136108, "learning_rate": 4.385219056600711e-06, "loss": 0.7816, "step": 15658 }, { "epoch": 0.5531041242621054, "grad_norm": 1.7579516172409058, "learning_rate": 4.384651391100637e-06, "loss": 0.7904, "step": 15659 }, { "epoch": 0.5531394460658133, "grad_norm": 1.600431203842163, "learning_rate": 4.38408373365413e-06, "loss": 0.7937, "step": 15660 }, { "epoch": 0.5531747678695212, "grad_norm": 1.8608274459838867, "learning_rate": 4.3835160842686195e-06, "loss": 0.8053, "step": 15661 }, { "epoch": 0.5532100896732292, "grad_norm": 1.7505840063095093, "learning_rate": 4.382948442951533e-06, "loss": 0.8238, "step": 15662 }, { "epoch": 0.5532454114769371, "grad_norm": 1.5162898302078247, "learning_rate": 4.382380809710303e-06, "loss": 0.8015, "step": 15663 }, { "epoch": 0.553280733280645, "grad_norm": 1.666499137878418, "learning_rate": 4.381813184552357e-06, "loss": 0.7747, "step": 15664 }, { "epoch": 0.5533160550843529, "grad_norm": 1.7025303840637207, "learning_rate": 4.381245567485122e-06, "loss": 0.8162, "step": 15665 }, { "epoch": 0.5533513768880608, "grad_norm": 1.7689746618270874, "learning_rate": 4.380677958516031e-06, "loss": 0.769, "step": 15666 }, { "epoch": 0.5533866986917687, "grad_norm": 1.6087836027145386, "learning_rate": 4.3801103576525084e-06, "loss": 0.8071, "step": 15667 }, { "epoch": 0.5534220204954766, "grad_norm": 1.7900667190551758, "learning_rate": 4.379542764901986e-06, "loss": 0.7897, "step": 15668 }, { "epoch": 0.5534573422991845, "grad_norm": 2.146890878677368, "learning_rate": 4.378975180271892e-06, "loss": 0.7954, "step": 15669 }, { "epoch": 0.5534926641028924, "grad_norm": 1.6232661008834839, "learning_rate": 4.378407603769652e-06, "loss": 0.7568, "step": 15670 }, { "epoch": 0.5535279859066004, "grad_norm": 1.6625250577926636, "learning_rate": 4.3778400354027e-06, "loss": 0.7859, "step": 15671 }, { "epoch": 0.5535633077103083, "grad_norm": 1.6434420347213745, "learning_rate": 4.377272475178458e-06, "loss": 0.8167, "step": 15672 }, { "epoch": 0.5535986295140162, "grad_norm": 3.593167304992676, "learning_rate": 4.376704923104357e-06, "loss": 0.7989, "step": 15673 }, { "epoch": 0.5536339513177241, "grad_norm": 1.528779149055481, "learning_rate": 4.376137379187825e-06, "loss": 0.7602, "step": 15674 }, { "epoch": 0.553669273121432, "grad_norm": 1.6950656175613403, "learning_rate": 4.375569843436289e-06, "loss": 0.7849, "step": 15675 }, { "epoch": 0.5537045949251399, "grad_norm": 1.5856729745864868, "learning_rate": 4.375002315857178e-06, "loss": 0.7845, "step": 15676 }, { "epoch": 0.5537399167288478, "grad_norm": 1.6400327682495117, "learning_rate": 4.374434796457919e-06, "loss": 0.8077, "step": 15677 }, { "epoch": 0.5537752385325556, "grad_norm": 1.615399956703186, "learning_rate": 4.373867285245941e-06, "loss": 0.7867, "step": 15678 }, { "epoch": 0.5538105603362635, "grad_norm": 1.7021253108978271, "learning_rate": 4.37329978222867e-06, "loss": 0.8067, "step": 15679 }, { "epoch": 0.5538458821399714, "grad_norm": 1.6186118125915527, "learning_rate": 4.372732287413532e-06, "loss": 0.7795, "step": 15680 }, { "epoch": 0.5538812039436793, "grad_norm": 1.6161353588104248, "learning_rate": 4.372164800807957e-06, "loss": 0.7621, "step": 15681 }, { "epoch": 0.5539165257473873, "grad_norm": 1.5917999744415283, "learning_rate": 4.371597322419372e-06, "loss": 0.7641, "step": 15682 }, { "epoch": 0.5539518475510952, "grad_norm": 1.5821722745895386, "learning_rate": 4.3710298522552005e-06, "loss": 0.7647, "step": 15683 }, { "epoch": 0.5539871693548031, "grad_norm": 1.5662277936935425, "learning_rate": 4.370462390322875e-06, "loss": 0.7806, "step": 15684 }, { "epoch": 0.554022491158511, "grad_norm": 1.697686791419983, "learning_rate": 4.369894936629818e-06, "loss": 0.7706, "step": 15685 }, { "epoch": 0.5540578129622189, "grad_norm": 1.540346622467041, "learning_rate": 4.369327491183457e-06, "loss": 0.7881, "step": 15686 }, { "epoch": 0.5540931347659268, "grad_norm": 1.631349802017212, "learning_rate": 4.36876005399122e-06, "loss": 0.7801, "step": 15687 }, { "epoch": 0.5541284565696347, "grad_norm": 1.8566912412643433, "learning_rate": 4.368192625060533e-06, "loss": 0.8004, "step": 15688 }, { "epoch": 0.5541637783733426, "grad_norm": 1.7707080841064453, "learning_rate": 4.367625204398824e-06, "loss": 0.8315, "step": 15689 }, { "epoch": 0.5541991001770505, "grad_norm": 1.6352040767669678, "learning_rate": 4.367057792013515e-06, "loss": 0.8457, "step": 15690 }, { "epoch": 0.5542344219807585, "grad_norm": 1.7774386405944824, "learning_rate": 4.366490387912035e-06, "loss": 0.7945, "step": 15691 }, { "epoch": 0.5542697437844664, "grad_norm": 1.5922951698303223, "learning_rate": 4.3659229921018095e-06, "loss": 0.8213, "step": 15692 }, { "epoch": 0.5543050655881743, "grad_norm": 1.6692776679992676, "learning_rate": 4.365355604590264e-06, "loss": 0.7781, "step": 15693 }, { "epoch": 0.5543403873918822, "grad_norm": 1.6878974437713623, "learning_rate": 4.364788225384826e-06, "loss": 0.7749, "step": 15694 }, { "epoch": 0.5543757091955901, "grad_norm": 1.5762330293655396, "learning_rate": 4.364220854492921e-06, "loss": 0.7755, "step": 15695 }, { "epoch": 0.554411030999298, "grad_norm": 1.7016735076904297, "learning_rate": 4.363653491921973e-06, "loss": 0.7739, "step": 15696 }, { "epoch": 0.5544463528030059, "grad_norm": 1.5907875299453735, "learning_rate": 4.363086137679408e-06, "loss": 0.7633, "step": 15697 }, { "epoch": 0.5544816746067138, "grad_norm": 2.173708915710449, "learning_rate": 4.362518791772653e-06, "loss": 0.7875, "step": 15698 }, { "epoch": 0.5545169964104217, "grad_norm": 2.095823049545288, "learning_rate": 4.361951454209131e-06, "loss": 0.8118, "step": 15699 }, { "epoch": 0.5545523182141296, "grad_norm": 1.825365662574768, "learning_rate": 4.36138412499627e-06, "loss": 0.8216, "step": 15700 }, { "epoch": 0.5545876400178376, "grad_norm": 1.6589981317520142, "learning_rate": 4.360816804141492e-06, "loss": 0.7793, "step": 15701 }, { "epoch": 0.5546229618215455, "grad_norm": 1.8905682563781738, "learning_rate": 4.360249491652224e-06, "loss": 0.8124, "step": 15702 }, { "epoch": 0.5546582836252534, "grad_norm": 2.3857319355010986, "learning_rate": 4.359682187535891e-06, "loss": 0.789, "step": 15703 }, { "epoch": 0.5546936054289612, "grad_norm": 2.837034225463867, "learning_rate": 4.359114891799916e-06, "loss": 0.7966, "step": 15704 }, { "epoch": 0.5547289272326691, "grad_norm": 1.7420482635498047, "learning_rate": 4.358547604451726e-06, "loss": 0.7618, "step": 15705 }, { "epoch": 0.554764249036377, "grad_norm": 1.735782265663147, "learning_rate": 4.357980325498745e-06, "loss": 0.785, "step": 15706 }, { "epoch": 0.5547995708400849, "grad_norm": 1.637288212776184, "learning_rate": 4.357413054948394e-06, "loss": 0.7977, "step": 15707 }, { "epoch": 0.5548348926437928, "grad_norm": 1.7561030387878418, "learning_rate": 4.356845792808105e-06, "loss": 0.7931, "step": 15708 }, { "epoch": 0.5548702144475007, "grad_norm": 1.725632667541504, "learning_rate": 4.356278539085293e-06, "loss": 0.7738, "step": 15709 }, { "epoch": 0.5549055362512086, "grad_norm": 1.6393572092056274, "learning_rate": 4.355711293787389e-06, "loss": 0.8233, "step": 15710 }, { "epoch": 0.5549408580549166, "grad_norm": 1.6671963930130005, "learning_rate": 4.355144056921812e-06, "loss": 0.7811, "step": 15711 }, { "epoch": 0.5549761798586245, "grad_norm": 1.5553609132766724, "learning_rate": 4.35457682849599e-06, "loss": 0.7762, "step": 15712 }, { "epoch": 0.5550115016623324, "grad_norm": 2.6319305896759033, "learning_rate": 4.354009608517345e-06, "loss": 0.7699, "step": 15713 }, { "epoch": 0.5550468234660403, "grad_norm": 1.536318063735962, "learning_rate": 4.3534423969933e-06, "loss": 0.7749, "step": 15714 }, { "epoch": 0.5550821452697482, "grad_norm": 1.649847149848938, "learning_rate": 4.35287519393128e-06, "loss": 0.7996, "step": 15715 }, { "epoch": 0.5551174670734561, "grad_norm": 1.6022340059280396, "learning_rate": 4.352307999338709e-06, "loss": 0.8081, "step": 15716 }, { "epoch": 0.555152788877164, "grad_norm": 1.0105940103530884, "learning_rate": 4.351740813223007e-06, "loss": 0.5823, "step": 15717 }, { "epoch": 0.5551881106808719, "grad_norm": 2.2410402297973633, "learning_rate": 4.3511736355916e-06, "loss": 0.7386, "step": 15718 }, { "epoch": 0.5552234324845798, "grad_norm": 1.6356723308563232, "learning_rate": 4.3506064664519106e-06, "loss": 0.8069, "step": 15719 }, { "epoch": 0.5552587542882877, "grad_norm": 1.679160714149475, "learning_rate": 4.3500393058113615e-06, "loss": 0.7455, "step": 15720 }, { "epoch": 0.5552940760919957, "grad_norm": 1.6538728475570679, "learning_rate": 4.349472153677376e-06, "loss": 0.8028, "step": 15721 }, { "epoch": 0.5553293978957036, "grad_norm": 1.8608282804489136, "learning_rate": 4.348905010057377e-06, "loss": 0.8289, "step": 15722 }, { "epoch": 0.5553647196994115, "grad_norm": 1.7004873752593994, "learning_rate": 4.3483378749587866e-06, "loss": 0.8152, "step": 15723 }, { "epoch": 0.5554000415031194, "grad_norm": 2.069593667984009, "learning_rate": 4.347770748389028e-06, "loss": 0.7873, "step": 15724 }, { "epoch": 0.5554353633068273, "grad_norm": 1.8999061584472656, "learning_rate": 4.347203630355522e-06, "loss": 0.7812, "step": 15725 }, { "epoch": 0.5554706851105352, "grad_norm": 1.7385197877883911, "learning_rate": 4.346636520865696e-06, "loss": 0.7909, "step": 15726 }, { "epoch": 0.5555060069142431, "grad_norm": 1.808797001838684, "learning_rate": 4.346069419926964e-06, "loss": 0.7721, "step": 15727 }, { "epoch": 0.555541328717951, "grad_norm": 1.600582242012024, "learning_rate": 4.3455023275467545e-06, "loss": 0.7762, "step": 15728 }, { "epoch": 0.5555766505216589, "grad_norm": 1.6920701265335083, "learning_rate": 4.3449352437324876e-06, "loss": 0.7752, "step": 15729 }, { "epoch": 0.5556119723253667, "grad_norm": 1.6718367338180542, "learning_rate": 4.344368168491584e-06, "loss": 0.7933, "step": 15730 }, { "epoch": 0.5556472941290747, "grad_norm": 1.728821039199829, "learning_rate": 4.3438011018314665e-06, "loss": 0.7848, "step": 15731 }, { "epoch": 0.5556826159327826, "grad_norm": 0.8771234750747681, "learning_rate": 4.343234043759558e-06, "loss": 0.5561, "step": 15732 }, { "epoch": 0.5557179377364905, "grad_norm": 1.5730482339859009, "learning_rate": 4.342666994283278e-06, "loss": 0.7892, "step": 15733 }, { "epoch": 0.5557532595401984, "grad_norm": 1.6671783924102783, "learning_rate": 4.342099953410049e-06, "loss": 0.785, "step": 15734 }, { "epoch": 0.5557885813439063, "grad_norm": 1.606090784072876, "learning_rate": 4.3415329211472916e-06, "loss": 0.7943, "step": 15735 }, { "epoch": 0.5558239031476142, "grad_norm": 1.7819530963897705, "learning_rate": 4.3409658975024285e-06, "loss": 0.7942, "step": 15736 }, { "epoch": 0.5558592249513221, "grad_norm": 1.6142051219940186, "learning_rate": 4.34039888248288e-06, "loss": 0.7615, "step": 15737 }, { "epoch": 0.55589454675503, "grad_norm": 1.6995933055877686, "learning_rate": 4.339831876096066e-06, "loss": 0.8228, "step": 15738 }, { "epoch": 0.5559298685587379, "grad_norm": 2.0953125953674316, "learning_rate": 4.339264878349409e-06, "loss": 0.8068, "step": 15739 }, { "epoch": 0.5559651903624458, "grad_norm": 1.8316293954849243, "learning_rate": 4.338697889250329e-06, "loss": 0.7879, "step": 15740 }, { "epoch": 0.5560005121661538, "grad_norm": 1.6391969919204712, "learning_rate": 4.338130908806245e-06, "loss": 0.7848, "step": 15741 }, { "epoch": 0.5560358339698617, "grad_norm": 1.559644103050232, "learning_rate": 4.3375639370245815e-06, "loss": 0.7795, "step": 15742 }, { "epoch": 0.5560711557735696, "grad_norm": 1.783147931098938, "learning_rate": 4.336996973912756e-06, "loss": 0.7897, "step": 15743 }, { "epoch": 0.5561064775772775, "grad_norm": 1.8642257452011108, "learning_rate": 4.336430019478191e-06, "loss": 0.7908, "step": 15744 }, { "epoch": 0.5561417993809854, "grad_norm": 1.8215126991271973, "learning_rate": 4.335863073728302e-06, "loss": 0.7871, "step": 15745 }, { "epoch": 0.5561771211846933, "grad_norm": 1.6972663402557373, "learning_rate": 4.335296136670515e-06, "loss": 0.7659, "step": 15746 }, { "epoch": 0.5562124429884012, "grad_norm": 1.6233290433883667, "learning_rate": 4.334729208312246e-06, "loss": 0.7988, "step": 15747 }, { "epoch": 0.5562477647921091, "grad_norm": 1.6967869997024536, "learning_rate": 4.334162288660916e-06, "loss": 0.8141, "step": 15748 }, { "epoch": 0.556283086595817, "grad_norm": 1.629746675491333, "learning_rate": 4.333595377723945e-06, "loss": 0.7964, "step": 15749 }, { "epoch": 0.556318408399525, "grad_norm": 1.8448703289031982, "learning_rate": 4.3330284755087525e-06, "loss": 0.7951, "step": 15750 }, { "epoch": 0.5563537302032329, "grad_norm": 1.931294322013855, "learning_rate": 4.332461582022757e-06, "loss": 0.7649, "step": 15751 }, { "epoch": 0.5563890520069408, "grad_norm": 1.7381939888000488, "learning_rate": 4.33189469727338e-06, "loss": 0.7919, "step": 15752 }, { "epoch": 0.5564243738106487, "grad_norm": 1.6317164897918701, "learning_rate": 4.33132782126804e-06, "loss": 0.7989, "step": 15753 }, { "epoch": 0.5564596956143566, "grad_norm": 1.8257173299789429, "learning_rate": 4.330760954014154e-06, "loss": 0.8212, "step": 15754 }, { "epoch": 0.5564950174180645, "grad_norm": 1.4956597089767456, "learning_rate": 4.330194095519144e-06, "loss": 0.7727, "step": 15755 }, { "epoch": 0.5565303392217723, "grad_norm": 1.8167227506637573, "learning_rate": 4.329627245790428e-06, "loss": 0.7857, "step": 15756 }, { "epoch": 0.5565656610254802, "grad_norm": 1.9080982208251953, "learning_rate": 4.329060404835423e-06, "loss": 0.7704, "step": 15757 }, { "epoch": 0.5566009828291881, "grad_norm": 1.5885443687438965, "learning_rate": 4.328493572661551e-06, "loss": 0.8057, "step": 15758 }, { "epoch": 0.556636304632896, "grad_norm": 1.7487059831619263, "learning_rate": 4.327926749276228e-06, "loss": 0.8037, "step": 15759 }, { "epoch": 0.556671626436604, "grad_norm": 1.791873812675476, "learning_rate": 4.327359934686873e-06, "loss": 0.7965, "step": 15760 }, { "epoch": 0.5567069482403119, "grad_norm": 1.7036031484603882, "learning_rate": 4.326793128900906e-06, "loss": 0.7869, "step": 15761 }, { "epoch": 0.5567422700440198, "grad_norm": 1.6853106021881104, "learning_rate": 4.326226331925743e-06, "loss": 0.7905, "step": 15762 }, { "epoch": 0.5567775918477277, "grad_norm": 0.9770750403404236, "learning_rate": 4.325659543768806e-06, "loss": 0.5794, "step": 15763 }, { "epoch": 0.5568129136514356, "grad_norm": 1.8636550903320312, "learning_rate": 4.325092764437507e-06, "loss": 0.7758, "step": 15764 }, { "epoch": 0.5568482354551435, "grad_norm": 1.7105679512023926, "learning_rate": 4.3245259939392685e-06, "loss": 0.7627, "step": 15765 }, { "epoch": 0.5568835572588514, "grad_norm": 1.9587938785552979, "learning_rate": 4.323959232281506e-06, "loss": 0.7847, "step": 15766 }, { "epoch": 0.5569188790625593, "grad_norm": 1.649409532546997, "learning_rate": 4.323392479471637e-06, "loss": 0.8067, "step": 15767 }, { "epoch": 0.5569542008662672, "grad_norm": 1.6797741651535034, "learning_rate": 4.322825735517081e-06, "loss": 0.7625, "step": 15768 }, { "epoch": 0.5569895226699751, "grad_norm": 1.6848405599594116, "learning_rate": 4.322259000425254e-06, "loss": 0.7906, "step": 15769 }, { "epoch": 0.5570248444736831, "grad_norm": 2.063028573989868, "learning_rate": 4.321692274203575e-06, "loss": 0.7585, "step": 15770 }, { "epoch": 0.557060166277391, "grad_norm": 1.8703546524047852, "learning_rate": 4.32112555685946e-06, "loss": 0.8118, "step": 15771 }, { "epoch": 0.5570954880810989, "grad_norm": 1.8106422424316406, "learning_rate": 4.320558848400324e-06, "loss": 0.7585, "step": 15772 }, { "epoch": 0.5571308098848068, "grad_norm": 1.9414383172988892, "learning_rate": 4.319992148833588e-06, "loss": 0.7708, "step": 15773 }, { "epoch": 0.5571661316885147, "grad_norm": 1.7132399082183838, "learning_rate": 4.319425458166667e-06, "loss": 0.7774, "step": 15774 }, { "epoch": 0.5572014534922226, "grad_norm": 1.7028812170028687, "learning_rate": 4.318858776406976e-06, "loss": 0.8034, "step": 15775 }, { "epoch": 0.5572367752959305, "grad_norm": 1.7363027334213257, "learning_rate": 4.318292103561935e-06, "loss": 0.8035, "step": 15776 }, { "epoch": 0.5572720970996384, "grad_norm": 1.6343231201171875, "learning_rate": 4.317725439638958e-06, "loss": 0.7774, "step": 15777 }, { "epoch": 0.5573074189033463, "grad_norm": 1.7834010124206543, "learning_rate": 4.317158784645462e-06, "loss": 0.7708, "step": 15778 }, { "epoch": 0.5573427407070543, "grad_norm": 1.8676128387451172, "learning_rate": 4.3165921385888645e-06, "loss": 0.7603, "step": 15779 }, { "epoch": 0.5573780625107622, "grad_norm": 1.6541852951049805, "learning_rate": 4.3160255014765805e-06, "loss": 0.7683, "step": 15780 }, { "epoch": 0.5574133843144701, "grad_norm": 1.7790348529815674, "learning_rate": 4.315458873316026e-06, "loss": 0.8109, "step": 15781 }, { "epoch": 0.5574487061181779, "grad_norm": 1.729123592376709, "learning_rate": 4.314892254114616e-06, "loss": 0.767, "step": 15782 }, { "epoch": 0.5574840279218858, "grad_norm": 1.7633265256881714, "learning_rate": 4.3143256438797685e-06, "loss": 0.7507, "step": 15783 }, { "epoch": 0.5575193497255937, "grad_norm": 1.592778205871582, "learning_rate": 4.3137590426188986e-06, "loss": 0.7858, "step": 15784 }, { "epoch": 0.5575546715293016, "grad_norm": 1.6987614631652832, "learning_rate": 4.31319245033942e-06, "loss": 0.8175, "step": 15785 }, { "epoch": 0.5575899933330095, "grad_norm": 1.817610263824463, "learning_rate": 4.31262586704875e-06, "loss": 0.8464, "step": 15786 }, { "epoch": 0.5576253151367174, "grad_norm": 1.6688445806503296, "learning_rate": 4.312059292754304e-06, "loss": 0.7859, "step": 15787 }, { "epoch": 0.5576606369404253, "grad_norm": 1.8438801765441895, "learning_rate": 4.311492727463495e-06, "loss": 0.7753, "step": 15788 }, { "epoch": 0.5576959587441332, "grad_norm": 1.783340573310852, "learning_rate": 4.310926171183741e-06, "loss": 0.7787, "step": 15789 }, { "epoch": 0.5577312805478412, "grad_norm": 1.6647372245788574, "learning_rate": 4.310359623922456e-06, "loss": 0.7672, "step": 15790 }, { "epoch": 0.5577666023515491, "grad_norm": 1.7188620567321777, "learning_rate": 4.309793085687054e-06, "loss": 0.7798, "step": 15791 }, { "epoch": 0.557801924155257, "grad_norm": 1.6289970874786377, "learning_rate": 4.3092265564849505e-06, "loss": 0.8155, "step": 15792 }, { "epoch": 0.5578372459589649, "grad_norm": 2.0326430797576904, "learning_rate": 4.308660036323558e-06, "loss": 0.7665, "step": 15793 }, { "epoch": 0.5578725677626728, "grad_norm": 1.8938510417938232, "learning_rate": 4.308093525210296e-06, "loss": 0.8116, "step": 15794 }, { "epoch": 0.5579078895663807, "grad_norm": 1.6344603300094604, "learning_rate": 4.307527023152575e-06, "loss": 0.7973, "step": 15795 }, { "epoch": 0.5579432113700886, "grad_norm": 1.735575556755066, "learning_rate": 4.306960530157809e-06, "loss": 0.8066, "step": 15796 }, { "epoch": 0.5579785331737965, "grad_norm": 1.809180498123169, "learning_rate": 4.306394046233415e-06, "loss": 0.7822, "step": 15797 }, { "epoch": 0.5580138549775044, "grad_norm": 2.68911075592041, "learning_rate": 4.305827571386805e-06, "loss": 0.7713, "step": 15798 }, { "epoch": 0.5580491767812124, "grad_norm": 1.7065478563308716, "learning_rate": 4.305261105625392e-06, "loss": 0.7887, "step": 15799 }, { "epoch": 0.5580844985849203, "grad_norm": 1.5224194526672363, "learning_rate": 4.304694648956595e-06, "loss": 0.7825, "step": 15800 }, { "epoch": 0.5581198203886282, "grad_norm": 1.6479915380477905, "learning_rate": 4.304128201387819e-06, "loss": 0.8176, "step": 15801 }, { "epoch": 0.5581551421923361, "grad_norm": 1.8423147201538086, "learning_rate": 4.303561762926484e-06, "loss": 0.7801, "step": 15802 }, { "epoch": 0.558190463996044, "grad_norm": 1.9621498584747314, "learning_rate": 4.302995333580002e-06, "loss": 0.8214, "step": 15803 }, { "epoch": 0.5582257857997519, "grad_norm": 1.9681075811386108, "learning_rate": 4.302428913355786e-06, "loss": 0.8105, "step": 15804 }, { "epoch": 0.5582611076034598, "grad_norm": 1.7446473836898804, "learning_rate": 4.3018625022612495e-06, "loss": 0.7882, "step": 15805 }, { "epoch": 0.5582964294071677, "grad_norm": 1.8665168285369873, "learning_rate": 4.301296100303804e-06, "loss": 0.8243, "step": 15806 }, { "epoch": 0.5583317512108756, "grad_norm": 1.6598562002182007, "learning_rate": 4.3007297074908645e-06, "loss": 0.7715, "step": 15807 }, { "epoch": 0.5583670730145834, "grad_norm": 1.789993166923523, "learning_rate": 4.300163323829844e-06, "loss": 0.7459, "step": 15808 }, { "epoch": 0.5584023948182913, "grad_norm": 1.7372289896011353, "learning_rate": 4.299596949328153e-06, "loss": 0.768, "step": 15809 }, { "epoch": 0.5584377166219993, "grad_norm": 1.6681983470916748, "learning_rate": 4.299030583993206e-06, "loss": 0.772, "step": 15810 }, { "epoch": 0.5584730384257072, "grad_norm": 1.6027436256408691, "learning_rate": 4.298464227832416e-06, "loss": 0.8016, "step": 15811 }, { "epoch": 0.5585083602294151, "grad_norm": 1.7768359184265137, "learning_rate": 4.2978978808531925e-06, "loss": 0.7581, "step": 15812 }, { "epoch": 0.558543682033123, "grad_norm": 2.015009880065918, "learning_rate": 4.29733154306295e-06, "loss": 0.8019, "step": 15813 }, { "epoch": 0.5585790038368309, "grad_norm": 1.759753942489624, "learning_rate": 4.2967652144691016e-06, "loss": 0.8241, "step": 15814 }, { "epoch": 0.5586143256405388, "grad_norm": 1.8334771394729614, "learning_rate": 4.2961988950790565e-06, "loss": 0.7913, "step": 15815 }, { "epoch": 0.5586496474442467, "grad_norm": 1.6460288763046265, "learning_rate": 4.295632584900229e-06, "loss": 0.7778, "step": 15816 }, { "epoch": 0.5586849692479546, "grad_norm": 1.7038012742996216, "learning_rate": 4.295066283940027e-06, "loss": 0.7951, "step": 15817 }, { "epoch": 0.5587202910516625, "grad_norm": 0.9314693212509155, "learning_rate": 4.294499992205871e-06, "loss": 0.5966, "step": 15818 }, { "epoch": 0.5587556128553705, "grad_norm": 1.6549497842788696, "learning_rate": 4.293933709705161e-06, "loss": 0.8202, "step": 15819 }, { "epoch": 0.5587909346590784, "grad_norm": 1.714133620262146, "learning_rate": 4.293367436445316e-06, "loss": 0.7857, "step": 15820 }, { "epoch": 0.5588262564627863, "grad_norm": 1.6683040857315063, "learning_rate": 4.292801172433745e-06, "loss": 0.8045, "step": 15821 }, { "epoch": 0.5588615782664942, "grad_norm": 1.9905236959457397, "learning_rate": 4.292234917677858e-06, "loss": 0.8108, "step": 15822 }, { "epoch": 0.5588969000702021, "grad_norm": 1.9898558855056763, "learning_rate": 4.291668672185069e-06, "loss": 0.766, "step": 15823 }, { "epoch": 0.55893222187391, "grad_norm": 1.9686051607131958, "learning_rate": 4.291102435962787e-06, "loss": 0.8009, "step": 15824 }, { "epoch": 0.5589675436776179, "grad_norm": 1.7295407056808472, "learning_rate": 4.2905362090184215e-06, "loss": 0.7557, "step": 15825 }, { "epoch": 0.5590028654813258, "grad_norm": 1.8559415340423584, "learning_rate": 4.2899699913593864e-06, "loss": 0.8102, "step": 15826 }, { "epoch": 0.5590381872850337, "grad_norm": 1.7438151836395264, "learning_rate": 4.289403782993089e-06, "loss": 0.7438, "step": 15827 }, { "epoch": 0.5590735090887416, "grad_norm": 1.696135401725769, "learning_rate": 4.288837583926943e-06, "loss": 0.7738, "step": 15828 }, { "epoch": 0.5591088308924496, "grad_norm": 1.7359980344772339, "learning_rate": 4.2882713941683566e-06, "loss": 0.7957, "step": 15829 }, { "epoch": 0.5591441526961575, "grad_norm": 1.6348949670791626, "learning_rate": 4.287705213724739e-06, "loss": 0.7685, "step": 15830 }, { "epoch": 0.5591794744998654, "grad_norm": 1.718040943145752, "learning_rate": 4.287139042603503e-06, "loss": 0.794, "step": 15831 }, { "epoch": 0.5592147963035733, "grad_norm": 1.766709804534912, "learning_rate": 4.286572880812058e-06, "loss": 0.8016, "step": 15832 }, { "epoch": 0.5592501181072812, "grad_norm": 1.79681396484375, "learning_rate": 4.286006728357811e-06, "loss": 0.7624, "step": 15833 }, { "epoch": 0.559285439910989, "grad_norm": 1.7524605989456177, "learning_rate": 4.285440585248175e-06, "loss": 0.8141, "step": 15834 }, { "epoch": 0.5593207617146969, "grad_norm": 1.9046937227249146, "learning_rate": 4.284874451490559e-06, "loss": 0.7756, "step": 15835 }, { "epoch": 0.5593560835184048, "grad_norm": 1.9354102611541748, "learning_rate": 4.28430832709237e-06, "loss": 0.7879, "step": 15836 }, { "epoch": 0.5593914053221127, "grad_norm": 0.9322109222412109, "learning_rate": 4.2837422120610215e-06, "loss": 0.5566, "step": 15837 }, { "epoch": 0.5594267271258206, "grad_norm": 1.905678629875183, "learning_rate": 4.283176106403919e-06, "loss": 0.8161, "step": 15838 }, { "epoch": 0.5594620489295286, "grad_norm": 4.201472282409668, "learning_rate": 4.282610010128473e-06, "loss": 0.8198, "step": 15839 }, { "epoch": 0.5594973707332365, "grad_norm": 1.6351487636566162, "learning_rate": 4.2820439232420905e-06, "loss": 0.7764, "step": 15840 }, { "epoch": 0.5595326925369444, "grad_norm": 1.7030938863754272, "learning_rate": 4.281477845752184e-06, "loss": 0.7547, "step": 15841 }, { "epoch": 0.5595680143406523, "grad_norm": 1.5664093494415283, "learning_rate": 4.280911777666161e-06, "loss": 0.7665, "step": 15842 }, { "epoch": 0.5596033361443602, "grad_norm": 1.9055429697036743, "learning_rate": 4.280345718991428e-06, "loss": 0.7898, "step": 15843 }, { "epoch": 0.5596386579480681, "grad_norm": 1.7224839925765991, "learning_rate": 4.279779669735396e-06, "loss": 0.8014, "step": 15844 }, { "epoch": 0.559673979751776, "grad_norm": 1.6256104707717896, "learning_rate": 4.279213629905471e-06, "loss": 0.7942, "step": 15845 }, { "epoch": 0.5597093015554839, "grad_norm": 1.769781470298767, "learning_rate": 4.278647599509063e-06, "loss": 0.7948, "step": 15846 }, { "epoch": 0.5597446233591918, "grad_norm": 1.6756561994552612, "learning_rate": 4.27808157855358e-06, "loss": 0.8215, "step": 15847 }, { "epoch": 0.5597799451628998, "grad_norm": 1.70686936378479, "learning_rate": 4.27751556704643e-06, "loss": 0.7867, "step": 15848 }, { "epoch": 0.5598152669666077, "grad_norm": 3.046924591064453, "learning_rate": 4.276949564995018e-06, "loss": 0.7832, "step": 15849 }, { "epoch": 0.5598505887703156, "grad_norm": 1.6059577465057373, "learning_rate": 4.2763835724067575e-06, "loss": 0.7578, "step": 15850 }, { "epoch": 0.5598859105740235, "grad_norm": 1.6692620515823364, "learning_rate": 4.27581758928905e-06, "loss": 0.7641, "step": 15851 }, { "epoch": 0.5599212323777314, "grad_norm": 1.7012510299682617, "learning_rate": 4.275251615649308e-06, "loss": 0.796, "step": 15852 }, { "epoch": 0.5599565541814393, "grad_norm": 1.8062385320663452, "learning_rate": 4.274685651494936e-06, "loss": 0.7742, "step": 15853 }, { "epoch": 0.5599918759851472, "grad_norm": 1.9009658098220825, "learning_rate": 4.274119696833341e-06, "loss": 0.8028, "step": 15854 }, { "epoch": 0.5600271977888551, "grad_norm": 1.7585461139678955, "learning_rate": 4.273553751671934e-06, "loss": 0.8299, "step": 15855 }, { "epoch": 0.560062519592563, "grad_norm": 1.8200048208236694, "learning_rate": 4.272987816018116e-06, "loss": 0.7537, "step": 15856 }, { "epoch": 0.560097841396271, "grad_norm": 1.6944682598114014, "learning_rate": 4.2724218898792965e-06, "loss": 0.8159, "step": 15857 }, { "epoch": 0.5601331631999789, "grad_norm": 2.0138087272644043, "learning_rate": 4.271855973262884e-06, "loss": 0.8719, "step": 15858 }, { "epoch": 0.5601684850036868, "grad_norm": 1.657871127128601, "learning_rate": 4.271290066176282e-06, "loss": 0.7834, "step": 15859 }, { "epoch": 0.5602038068073946, "grad_norm": 1.7955615520477295, "learning_rate": 4.2707241686269e-06, "loss": 0.7945, "step": 15860 }, { "epoch": 0.5602391286111025, "grad_norm": 1.6313159465789795, "learning_rate": 4.270158280622142e-06, "loss": 0.7695, "step": 15861 }, { "epoch": 0.5602744504148104, "grad_norm": 1.6810863018035889, "learning_rate": 4.269592402169417e-06, "loss": 0.7363, "step": 15862 }, { "epoch": 0.5603097722185183, "grad_norm": 1.740273118019104, "learning_rate": 4.269026533276127e-06, "loss": 0.7883, "step": 15863 }, { "epoch": 0.5603450940222262, "grad_norm": 1.6977167129516602, "learning_rate": 4.268460673949681e-06, "loss": 0.809, "step": 15864 }, { "epoch": 0.5603804158259341, "grad_norm": 1.667933464050293, "learning_rate": 4.267894824197486e-06, "loss": 0.7714, "step": 15865 }, { "epoch": 0.560415737629642, "grad_norm": 1.6862658262252808, "learning_rate": 4.267328984026944e-06, "loss": 0.7762, "step": 15866 }, { "epoch": 0.5604510594333499, "grad_norm": 1.7884091138839722, "learning_rate": 4.266763153445462e-06, "loss": 0.7688, "step": 15867 }, { "epoch": 0.5604863812370579, "grad_norm": 2.6924235820770264, "learning_rate": 4.266197332460448e-06, "loss": 0.7885, "step": 15868 }, { "epoch": 0.5605217030407658, "grad_norm": 1.7193175554275513, "learning_rate": 4.265631521079305e-06, "loss": 0.7926, "step": 15869 }, { "epoch": 0.5605570248444737, "grad_norm": 3.030371904373169, "learning_rate": 4.265065719309437e-06, "loss": 0.7857, "step": 15870 }, { "epoch": 0.5605923466481816, "grad_norm": 1.5979094505310059, "learning_rate": 4.264499927158252e-06, "loss": 0.7732, "step": 15871 }, { "epoch": 0.5606276684518895, "grad_norm": 1.604235053062439, "learning_rate": 4.263934144633154e-06, "loss": 0.7995, "step": 15872 }, { "epoch": 0.5606629902555974, "grad_norm": 1.6457760334014893, "learning_rate": 4.263368371741546e-06, "loss": 0.7818, "step": 15873 }, { "epoch": 0.5606983120593053, "grad_norm": 1.625891089439392, "learning_rate": 4.262802608490837e-06, "loss": 0.7712, "step": 15874 }, { "epoch": 0.5607336338630132, "grad_norm": 1.5318785905838013, "learning_rate": 4.262236854888427e-06, "loss": 0.7493, "step": 15875 }, { "epoch": 0.5607689556667211, "grad_norm": 1.7107139825820923, "learning_rate": 4.261671110941723e-06, "loss": 0.7807, "step": 15876 }, { "epoch": 0.560804277470429, "grad_norm": 2.0394527912139893, "learning_rate": 4.261105376658128e-06, "loss": 0.7986, "step": 15877 }, { "epoch": 0.560839599274137, "grad_norm": 1.6201832294464111, "learning_rate": 4.260539652045047e-06, "loss": 0.814, "step": 15878 }, { "epoch": 0.5608749210778449, "grad_norm": 1.5611077547073364, "learning_rate": 4.259973937109885e-06, "loss": 0.7761, "step": 15879 }, { "epoch": 0.5609102428815528, "grad_norm": 1.7395116090774536, "learning_rate": 4.2594082318600435e-06, "loss": 0.8024, "step": 15880 }, { "epoch": 0.5609455646852607, "grad_norm": 1.7681784629821777, "learning_rate": 4.258842536302929e-06, "loss": 0.7664, "step": 15881 }, { "epoch": 0.5609808864889686, "grad_norm": 1.762263298034668, "learning_rate": 4.258276850445944e-06, "loss": 0.8004, "step": 15882 }, { "epoch": 0.5610162082926765, "grad_norm": 1.0263715982437134, "learning_rate": 4.257711174296491e-06, "loss": 0.6296, "step": 15883 }, { "epoch": 0.5610515300963844, "grad_norm": 1.8428065776824951, "learning_rate": 4.257145507861976e-06, "loss": 0.8074, "step": 15884 }, { "epoch": 0.5610868519000923, "grad_norm": 1.7446337938308716, "learning_rate": 4.256579851149799e-06, "loss": 0.792, "step": 15885 }, { "epoch": 0.5611221737038001, "grad_norm": 1.5929334163665771, "learning_rate": 4.256014204167368e-06, "loss": 0.7739, "step": 15886 }, { "epoch": 0.561157495507508, "grad_norm": 1.6116111278533936, "learning_rate": 4.255448566922081e-06, "loss": 0.7724, "step": 15887 }, { "epoch": 0.561192817311216, "grad_norm": 1.6806820631027222, "learning_rate": 4.254882939421344e-06, "loss": 0.8042, "step": 15888 }, { "epoch": 0.5612281391149239, "grad_norm": 0.8816738128662109, "learning_rate": 4.254317321672559e-06, "loss": 0.5523, "step": 15889 }, { "epoch": 0.5612634609186318, "grad_norm": 2.021980047225952, "learning_rate": 4.253751713683129e-06, "loss": 0.8015, "step": 15890 }, { "epoch": 0.5612987827223397, "grad_norm": 1.9496570825576782, "learning_rate": 4.2531861154604555e-06, "loss": 0.8039, "step": 15891 }, { "epoch": 0.5613341045260476, "grad_norm": 1.8438310623168945, "learning_rate": 4.252620527011945e-06, "loss": 0.8156, "step": 15892 }, { "epoch": 0.5613694263297555, "grad_norm": 1.8758156299591064, "learning_rate": 4.252054948344992e-06, "loss": 0.8546, "step": 15893 }, { "epoch": 0.5614047481334634, "grad_norm": 1.5487817525863647, "learning_rate": 4.251489379467006e-06, "loss": 0.7733, "step": 15894 }, { "epoch": 0.5614400699371713, "grad_norm": 1.6897647380828857, "learning_rate": 4.2509238203853846e-06, "loss": 0.7855, "step": 15895 }, { "epoch": 0.5614753917408792, "grad_norm": 1.843079686164856, "learning_rate": 4.2503582711075325e-06, "loss": 0.8131, "step": 15896 }, { "epoch": 0.5615107135445871, "grad_norm": 1.808786392211914, "learning_rate": 4.24979273164085e-06, "loss": 0.7803, "step": 15897 }, { "epoch": 0.5615460353482951, "grad_norm": 1.7434601783752441, "learning_rate": 4.2492272019927385e-06, "loss": 0.7357, "step": 15898 }, { "epoch": 0.561581357152003, "grad_norm": 1.7353020906448364, "learning_rate": 4.248661682170602e-06, "loss": 0.7991, "step": 15899 }, { "epoch": 0.5616166789557109, "grad_norm": 1.7558434009552002, "learning_rate": 4.2480961721818395e-06, "loss": 0.8009, "step": 15900 }, { "epoch": 0.5616520007594188, "grad_norm": 4.39357328414917, "learning_rate": 4.2475306720338515e-06, "loss": 0.7921, "step": 15901 }, { "epoch": 0.5616873225631267, "grad_norm": 1.7034944295883179, "learning_rate": 4.246965181734042e-06, "loss": 0.804, "step": 15902 }, { "epoch": 0.5617226443668346, "grad_norm": 2.015545129776001, "learning_rate": 4.246399701289811e-06, "loss": 0.7966, "step": 15903 }, { "epoch": 0.5617579661705425, "grad_norm": 2.00307297706604, "learning_rate": 4.245834230708557e-06, "loss": 0.8393, "step": 15904 }, { "epoch": 0.5617932879742504, "grad_norm": 1.8070193529129028, "learning_rate": 4.245268769997686e-06, "loss": 0.7951, "step": 15905 }, { "epoch": 0.5618286097779583, "grad_norm": 1.5890758037567139, "learning_rate": 4.244703319164594e-06, "loss": 0.8038, "step": 15906 }, { "epoch": 0.5618639315816663, "grad_norm": 2.132436752319336, "learning_rate": 4.244137878216681e-06, "loss": 0.8103, "step": 15907 }, { "epoch": 0.5618992533853742, "grad_norm": 1.5776184797286987, "learning_rate": 4.243572447161352e-06, "loss": 0.8024, "step": 15908 }, { "epoch": 0.5619345751890821, "grad_norm": 1.672426462173462, "learning_rate": 4.243007026006003e-06, "loss": 0.7667, "step": 15909 }, { "epoch": 0.56196989699279, "grad_norm": 1.7163474559783936, "learning_rate": 4.242441614758037e-06, "loss": 0.7997, "step": 15910 }, { "epoch": 0.5620052187964979, "grad_norm": 1.6471177339553833, "learning_rate": 4.241876213424854e-06, "loss": 0.8127, "step": 15911 }, { "epoch": 0.5620405406002057, "grad_norm": 1.9802933931350708, "learning_rate": 4.241310822013851e-06, "loss": 0.7716, "step": 15912 }, { "epoch": 0.5620758624039136, "grad_norm": 1.971530795097351, "learning_rate": 4.240745440532429e-06, "loss": 0.8138, "step": 15913 }, { "epoch": 0.5621111842076215, "grad_norm": 1.834572196006775, "learning_rate": 4.2401800689879875e-06, "loss": 0.802, "step": 15914 }, { "epoch": 0.5621465060113294, "grad_norm": 1.7055608034133911, "learning_rate": 4.239614707387928e-06, "loss": 0.7681, "step": 15915 }, { "epoch": 0.5621818278150373, "grad_norm": 1.902361273765564, "learning_rate": 4.239049355739647e-06, "loss": 0.7523, "step": 15916 }, { "epoch": 0.5622171496187452, "grad_norm": 1.6727294921875, "learning_rate": 4.238484014050544e-06, "loss": 0.7874, "step": 15917 }, { "epoch": 0.5622524714224532, "grad_norm": 1.675472378730774, "learning_rate": 4.2379186823280215e-06, "loss": 0.7773, "step": 15918 }, { "epoch": 0.5622877932261611, "grad_norm": 1.9591799974441528, "learning_rate": 4.2373533605794735e-06, "loss": 0.7999, "step": 15919 }, { "epoch": 0.562323115029869, "grad_norm": 1.6823418140411377, "learning_rate": 4.236788048812303e-06, "loss": 0.7818, "step": 15920 }, { "epoch": 0.5623584368335769, "grad_norm": 1.617868423461914, "learning_rate": 4.236222747033907e-06, "loss": 0.7503, "step": 15921 }, { "epoch": 0.5623937586372848, "grad_norm": 1.7530441284179688, "learning_rate": 4.2356574552516834e-06, "loss": 0.7823, "step": 15922 }, { "epoch": 0.5624290804409927, "grad_norm": 2.059701919555664, "learning_rate": 4.235092173473032e-06, "loss": 0.8043, "step": 15923 }, { "epoch": 0.5624644022447006, "grad_norm": 1.7961714267730713, "learning_rate": 4.234526901705351e-06, "loss": 0.7947, "step": 15924 }, { "epoch": 0.5624997240484085, "grad_norm": 1.475734829902649, "learning_rate": 4.233961639956037e-06, "loss": 0.7879, "step": 15925 }, { "epoch": 0.5625350458521164, "grad_norm": 1.843629240989685, "learning_rate": 4.233396388232489e-06, "loss": 0.7999, "step": 15926 }, { "epoch": 0.5625703676558244, "grad_norm": 3.339444160461426, "learning_rate": 4.232831146542107e-06, "loss": 0.7704, "step": 15927 }, { "epoch": 0.5626056894595323, "grad_norm": 1.713318943977356, "learning_rate": 4.232265914892283e-06, "loss": 0.8064, "step": 15928 }, { "epoch": 0.5626410112632402, "grad_norm": 2.1631033420562744, "learning_rate": 4.2317006932904226e-06, "loss": 0.8284, "step": 15929 }, { "epoch": 0.5626763330669481, "grad_norm": 1.7306692600250244, "learning_rate": 4.231135481743917e-06, "loss": 0.7855, "step": 15930 }, { "epoch": 0.562711654870656, "grad_norm": 1.8056902885437012, "learning_rate": 4.2305702802601666e-06, "loss": 0.8075, "step": 15931 }, { "epoch": 0.5627469766743639, "grad_norm": 1.77101469039917, "learning_rate": 4.230005088846565e-06, "loss": 0.7963, "step": 15932 }, { "epoch": 0.5627822984780718, "grad_norm": 1.7750897407531738, "learning_rate": 4.229439907510515e-06, "loss": 0.7723, "step": 15933 }, { "epoch": 0.5628176202817797, "grad_norm": 1.667457103729248, "learning_rate": 4.22887473625941e-06, "loss": 0.8033, "step": 15934 }, { "epoch": 0.5628529420854876, "grad_norm": 1.6807937622070312, "learning_rate": 4.228309575100646e-06, "loss": 0.8157, "step": 15935 }, { "epoch": 0.5628882638891956, "grad_norm": 1.8968770503997803, "learning_rate": 4.227744424041622e-06, "loss": 0.7835, "step": 15936 }, { "epoch": 0.5629235856929035, "grad_norm": 1.6634935140609741, "learning_rate": 4.227179283089735e-06, "loss": 0.8081, "step": 15937 }, { "epoch": 0.5629589074966113, "grad_norm": 1.7203600406646729, "learning_rate": 4.226614152252378e-06, "loss": 0.7718, "step": 15938 }, { "epoch": 0.5629942293003192, "grad_norm": 1.6578983068466187, "learning_rate": 4.2260490315369515e-06, "loss": 0.7484, "step": 15939 }, { "epoch": 0.5630295511040271, "grad_norm": 1.939926266670227, "learning_rate": 4.225483920950849e-06, "loss": 0.7785, "step": 15940 }, { "epoch": 0.563064872907735, "grad_norm": 1.6398265361785889, "learning_rate": 4.2249188205014664e-06, "loss": 0.7811, "step": 15941 }, { "epoch": 0.5631001947114429, "grad_norm": 1.6715168952941895, "learning_rate": 4.224353730196203e-06, "loss": 0.7695, "step": 15942 }, { "epoch": 0.5631355165151508, "grad_norm": 1.7871248722076416, "learning_rate": 4.223788650042449e-06, "loss": 0.7966, "step": 15943 }, { "epoch": 0.5631708383188587, "grad_norm": 0.9096289873123169, "learning_rate": 4.223223580047606e-06, "loss": 0.5528, "step": 15944 }, { "epoch": 0.5632061601225666, "grad_norm": 1.6003997325897217, "learning_rate": 4.222658520219066e-06, "loss": 0.7866, "step": 15945 }, { "epoch": 0.5632414819262745, "grad_norm": 1.816564679145813, "learning_rate": 4.222093470564225e-06, "loss": 0.7951, "step": 15946 }, { "epoch": 0.5632768037299825, "grad_norm": 1.6338378190994263, "learning_rate": 4.221528431090479e-06, "loss": 0.7728, "step": 15947 }, { "epoch": 0.5633121255336904, "grad_norm": 2.6017251014709473, "learning_rate": 4.220963401805223e-06, "loss": 0.8002, "step": 15948 }, { "epoch": 0.5633474473373983, "grad_norm": 1.7318614721298218, "learning_rate": 4.220398382715851e-06, "loss": 0.7925, "step": 15949 }, { "epoch": 0.5633827691411062, "grad_norm": 1.6010669469833374, "learning_rate": 4.219833373829758e-06, "loss": 0.7685, "step": 15950 }, { "epoch": 0.5634180909448141, "grad_norm": 1.6673753261566162, "learning_rate": 4.2192683751543384e-06, "loss": 0.7699, "step": 15951 }, { "epoch": 0.563453412748522, "grad_norm": 1.6868637800216675, "learning_rate": 4.218703386696989e-06, "loss": 0.7818, "step": 15952 }, { "epoch": 0.5634887345522299, "grad_norm": 1.8106448650360107, "learning_rate": 4.2181384084651014e-06, "loss": 0.8174, "step": 15953 }, { "epoch": 0.5635240563559378, "grad_norm": 1.8377280235290527, "learning_rate": 4.217573440466072e-06, "loss": 0.8239, "step": 15954 }, { "epoch": 0.5635593781596457, "grad_norm": 1.5733517408370972, "learning_rate": 4.217008482707295e-06, "loss": 0.7767, "step": 15955 }, { "epoch": 0.5635946999633537, "grad_norm": 1.7303699254989624, "learning_rate": 4.216443535196162e-06, "loss": 0.8013, "step": 15956 }, { "epoch": 0.5636300217670616, "grad_norm": 1.5774214267730713, "learning_rate": 4.215878597940071e-06, "loss": 0.7669, "step": 15957 }, { "epoch": 0.5636653435707695, "grad_norm": 1.6093250513076782, "learning_rate": 4.215313670946412e-06, "loss": 0.8314, "step": 15958 }, { "epoch": 0.5637006653744774, "grad_norm": 1.9062628746032715, "learning_rate": 4.214748754222579e-06, "loss": 0.788, "step": 15959 }, { "epoch": 0.5637359871781853, "grad_norm": 1.677942156791687, "learning_rate": 4.214183847775969e-06, "loss": 0.7957, "step": 15960 }, { "epoch": 0.5637713089818932, "grad_norm": 1.6688426733016968, "learning_rate": 4.213618951613973e-06, "loss": 0.7963, "step": 15961 }, { "epoch": 0.5638066307856011, "grad_norm": 1.7340470552444458, "learning_rate": 4.213054065743983e-06, "loss": 0.7972, "step": 15962 }, { "epoch": 0.563841952589309, "grad_norm": 1.8041982650756836, "learning_rate": 4.212489190173394e-06, "loss": 0.8083, "step": 15963 }, { "epoch": 0.5638772743930168, "grad_norm": 1.621228575706482, "learning_rate": 4.211924324909599e-06, "loss": 0.761, "step": 15964 }, { "epoch": 0.5639125961967247, "grad_norm": 1.6465067863464355, "learning_rate": 4.211359469959989e-06, "loss": 0.8112, "step": 15965 }, { "epoch": 0.5639479180004326, "grad_norm": 1.7101596593856812, "learning_rate": 4.2107946253319605e-06, "loss": 0.7884, "step": 15966 }, { "epoch": 0.5639832398041406, "grad_norm": 1.6520540714263916, "learning_rate": 4.210229791032902e-06, "loss": 0.8282, "step": 15967 }, { "epoch": 0.5640185616078485, "grad_norm": 1.7316666841506958, "learning_rate": 4.209664967070208e-06, "loss": 0.7814, "step": 15968 }, { "epoch": 0.5640538834115564, "grad_norm": 1.5289782285690308, "learning_rate": 4.20910015345127e-06, "loss": 0.7591, "step": 15969 }, { "epoch": 0.5640892052152643, "grad_norm": 1.7367222309112549, "learning_rate": 4.20853535018348e-06, "loss": 0.7978, "step": 15970 }, { "epoch": 0.5641245270189722, "grad_norm": 1.5732295513153076, "learning_rate": 4.207970557274231e-06, "loss": 0.7741, "step": 15971 }, { "epoch": 0.5641598488226801, "grad_norm": 1.6638740301132202, "learning_rate": 4.207405774730914e-06, "loss": 0.7799, "step": 15972 }, { "epoch": 0.564195170626388, "grad_norm": 1.5797020196914673, "learning_rate": 4.206841002560922e-06, "loss": 0.8027, "step": 15973 }, { "epoch": 0.5642304924300959, "grad_norm": 1.7062187194824219, "learning_rate": 4.206276240771645e-06, "loss": 0.7999, "step": 15974 }, { "epoch": 0.5642658142338038, "grad_norm": 1.8077783584594727, "learning_rate": 4.205711489370476e-06, "loss": 0.8034, "step": 15975 }, { "epoch": 0.5643011360375118, "grad_norm": 1.732038974761963, "learning_rate": 4.205146748364806e-06, "loss": 0.7912, "step": 15976 }, { "epoch": 0.5643364578412197, "grad_norm": 1.6252855062484741, "learning_rate": 4.2045820177620245e-06, "loss": 0.7985, "step": 15977 }, { "epoch": 0.5643717796449276, "grad_norm": 1.6694711446762085, "learning_rate": 4.204017297569526e-06, "loss": 0.8108, "step": 15978 }, { "epoch": 0.5644071014486355, "grad_norm": 1.8232015371322632, "learning_rate": 4.203452587794698e-06, "loss": 0.7817, "step": 15979 }, { "epoch": 0.5644424232523434, "grad_norm": 1.8019111156463623, "learning_rate": 4.202887888444932e-06, "loss": 0.8284, "step": 15980 }, { "epoch": 0.5644777450560513, "grad_norm": 4.444906234741211, "learning_rate": 4.2023231995276214e-06, "loss": 0.8048, "step": 15981 }, { "epoch": 0.5645130668597592, "grad_norm": 1.7095450162887573, "learning_rate": 4.201758521050155e-06, "loss": 0.7687, "step": 15982 }, { "epoch": 0.5645483886634671, "grad_norm": 2.1809446811676025, "learning_rate": 4.2011938530199215e-06, "loss": 0.797, "step": 15983 }, { "epoch": 0.564583710467175, "grad_norm": 1.931614875793457, "learning_rate": 4.200629195444314e-06, "loss": 0.8064, "step": 15984 }, { "epoch": 0.564619032270883, "grad_norm": 1.8316395282745361, "learning_rate": 4.200064548330722e-06, "loss": 0.8003, "step": 15985 }, { "epoch": 0.5646543540745909, "grad_norm": 1.6227627992630005, "learning_rate": 4.199499911686533e-06, "loss": 0.7715, "step": 15986 }, { "epoch": 0.5646896758782988, "grad_norm": 1.8637784719467163, "learning_rate": 4.198935285519139e-06, "loss": 0.7658, "step": 15987 }, { "epoch": 0.5647249976820067, "grad_norm": 1.7840858697891235, "learning_rate": 4.19837066983593e-06, "loss": 0.7726, "step": 15988 }, { "epoch": 0.5647603194857146, "grad_norm": 1.9112179279327393, "learning_rate": 4.197806064644295e-06, "loss": 0.8101, "step": 15989 }, { "epoch": 0.5647956412894224, "grad_norm": 1.6000828742980957, "learning_rate": 4.197241469951623e-06, "loss": 0.7914, "step": 15990 }, { "epoch": 0.5648309630931303, "grad_norm": 1.6567729711532593, "learning_rate": 4.1966768857653035e-06, "loss": 0.7999, "step": 15991 }, { "epoch": 0.5648662848968382, "grad_norm": 1.667016625404358, "learning_rate": 4.196112312092726e-06, "loss": 0.7906, "step": 15992 }, { "epoch": 0.5649016067005461, "grad_norm": 1.6434476375579834, "learning_rate": 4.195547748941279e-06, "loss": 0.8025, "step": 15993 }, { "epoch": 0.564936928504254, "grad_norm": 1.5171362161636353, "learning_rate": 4.194983196318353e-06, "loss": 0.768, "step": 15994 }, { "epoch": 0.5649722503079619, "grad_norm": 1.5209877490997314, "learning_rate": 4.194418654231335e-06, "loss": 0.7959, "step": 15995 }, { "epoch": 0.5650075721116699, "grad_norm": 1.6648184061050415, "learning_rate": 4.193854122687614e-06, "loss": 0.7869, "step": 15996 }, { "epoch": 0.5650428939153778, "grad_norm": 1.773212194442749, "learning_rate": 4.193289601694579e-06, "loss": 0.7844, "step": 15997 }, { "epoch": 0.5650782157190857, "grad_norm": 1.5189135074615479, "learning_rate": 4.192725091259618e-06, "loss": 0.7599, "step": 15998 }, { "epoch": 0.5651135375227936, "grad_norm": 1.7538866996765137, "learning_rate": 4.192160591390119e-06, "loss": 0.7595, "step": 15999 }, { "epoch": 0.5651488593265015, "grad_norm": 1.6025680303573608, "learning_rate": 4.191596102093471e-06, "loss": 0.754, "step": 16000 }, { "epoch": 0.5651841811302094, "grad_norm": 1.5002872943878174, "learning_rate": 4.1910316233770595e-06, "loss": 0.7531, "step": 16001 }, { "epoch": 0.5652195029339173, "grad_norm": 2.1084558963775635, "learning_rate": 4.190467155248277e-06, "loss": 0.8301, "step": 16002 }, { "epoch": 0.5652548247376252, "grad_norm": 1.7048643827438354, "learning_rate": 4.189902697714507e-06, "loss": 0.8117, "step": 16003 }, { "epoch": 0.5652901465413331, "grad_norm": 1.655484676361084, "learning_rate": 4.189338250783139e-06, "loss": 0.815, "step": 16004 }, { "epoch": 0.565325468345041, "grad_norm": 1.7145822048187256, "learning_rate": 4.188773814461559e-06, "loss": 0.8169, "step": 16005 }, { "epoch": 0.565360790148749, "grad_norm": 1.7087199687957764, "learning_rate": 4.188209388757153e-06, "loss": 0.7815, "step": 16006 }, { "epoch": 0.5653961119524569, "grad_norm": 1.6087632179260254, "learning_rate": 4.187644973677311e-06, "loss": 0.7708, "step": 16007 }, { "epoch": 0.5654314337561648, "grad_norm": 1.8377552032470703, "learning_rate": 4.18708056922942e-06, "loss": 0.7908, "step": 16008 }, { "epoch": 0.5654667555598727, "grad_norm": 1.8000069856643677, "learning_rate": 4.186516175420864e-06, "loss": 0.7956, "step": 16009 }, { "epoch": 0.5655020773635806, "grad_norm": 1.6871354579925537, "learning_rate": 4.185951792259032e-06, "loss": 0.8114, "step": 16010 }, { "epoch": 0.5655373991672885, "grad_norm": 1.656211256980896, "learning_rate": 4.18538741975131e-06, "loss": 0.8256, "step": 16011 }, { "epoch": 0.5655727209709964, "grad_norm": 1.639206886291504, "learning_rate": 4.184823057905084e-06, "loss": 0.7763, "step": 16012 }, { "epoch": 0.5656080427747043, "grad_norm": 1.6070369482040405, "learning_rate": 4.184258706727741e-06, "loss": 0.7847, "step": 16013 }, { "epoch": 0.5656433645784122, "grad_norm": 1.680429220199585, "learning_rate": 4.183694366226666e-06, "loss": 0.7938, "step": 16014 }, { "epoch": 0.5656786863821202, "grad_norm": 1.8389763832092285, "learning_rate": 4.183130036409247e-06, "loss": 0.766, "step": 16015 }, { "epoch": 0.565714008185828, "grad_norm": 2.350109815597534, "learning_rate": 4.182565717282868e-06, "loss": 0.779, "step": 16016 }, { "epoch": 0.5657493299895359, "grad_norm": 2.0634379386901855, "learning_rate": 4.1820014088549146e-06, "loss": 0.7871, "step": 16017 }, { "epoch": 0.5657846517932438, "grad_norm": 1.7432852983474731, "learning_rate": 4.181437111132773e-06, "loss": 0.8026, "step": 16018 }, { "epoch": 0.5658199735969517, "grad_norm": 1.6695741415023804, "learning_rate": 4.18087282412383e-06, "loss": 0.7938, "step": 16019 }, { "epoch": 0.5658552954006596, "grad_norm": 1.6908161640167236, "learning_rate": 4.180308547835468e-06, "loss": 0.8162, "step": 16020 }, { "epoch": 0.5658906172043675, "grad_norm": 1.5973063707351685, "learning_rate": 4.179744282275076e-06, "loss": 0.8076, "step": 16021 }, { "epoch": 0.5659259390080754, "grad_norm": 1.6271222829818726, "learning_rate": 4.179180027450035e-06, "loss": 0.7926, "step": 16022 }, { "epoch": 0.5659612608117833, "grad_norm": 1.676164984703064, "learning_rate": 4.178615783367731e-06, "loss": 0.7768, "step": 16023 }, { "epoch": 0.5659965826154912, "grad_norm": 1.471575140953064, "learning_rate": 4.178051550035549e-06, "loss": 0.7881, "step": 16024 }, { "epoch": 0.5660319044191991, "grad_norm": 1.640639305114746, "learning_rate": 4.177487327460875e-06, "loss": 0.7655, "step": 16025 }, { "epoch": 0.5660672262229071, "grad_norm": 1.5772651433944702, "learning_rate": 4.176923115651092e-06, "loss": 0.7812, "step": 16026 }, { "epoch": 0.566102548026615, "grad_norm": 1.7371587753295898, "learning_rate": 4.176358914613583e-06, "loss": 0.7652, "step": 16027 }, { "epoch": 0.5661378698303229, "grad_norm": 2.04822039604187, "learning_rate": 4.175794724355736e-06, "loss": 0.8127, "step": 16028 }, { "epoch": 0.5661731916340308, "grad_norm": 1.8597468137741089, "learning_rate": 4.175230544884931e-06, "loss": 0.779, "step": 16029 }, { "epoch": 0.5662085134377387, "grad_norm": 1.920027732849121, "learning_rate": 4.174666376208554e-06, "loss": 0.8106, "step": 16030 }, { "epoch": 0.5662438352414466, "grad_norm": 1.6510329246520996, "learning_rate": 4.174102218333988e-06, "loss": 0.8033, "step": 16031 }, { "epoch": 0.5662791570451545, "grad_norm": 1.6678894758224487, "learning_rate": 4.173538071268618e-06, "loss": 0.8035, "step": 16032 }, { "epoch": 0.5663144788488624, "grad_norm": 1.8824020624160767, "learning_rate": 4.172973935019825e-06, "loss": 0.7628, "step": 16033 }, { "epoch": 0.5663498006525703, "grad_norm": 1.6542448997497559, "learning_rate": 4.172409809594995e-06, "loss": 0.8128, "step": 16034 }, { "epoch": 0.5663851224562783, "grad_norm": 1.6320832967758179, "learning_rate": 4.1718456950015084e-06, "loss": 0.7605, "step": 16035 }, { "epoch": 0.5664204442599862, "grad_norm": 1.4986367225646973, "learning_rate": 4.171281591246752e-06, "loss": 0.7826, "step": 16036 }, { "epoch": 0.5664557660636941, "grad_norm": 1.6864160299301147, "learning_rate": 4.1707174983381045e-06, "loss": 0.7424, "step": 16037 }, { "epoch": 0.566491087867402, "grad_norm": 1.6718806028366089, "learning_rate": 4.17015341628295e-06, "loss": 0.7975, "step": 16038 }, { "epoch": 0.5665264096711099, "grad_norm": 1.7983399629592896, "learning_rate": 4.169589345088673e-06, "loss": 0.7825, "step": 16039 }, { "epoch": 0.5665617314748178, "grad_norm": 1.6288604736328125, "learning_rate": 4.169025284762656e-06, "loss": 0.7726, "step": 16040 }, { "epoch": 0.5665970532785257, "grad_norm": 1.7059935331344604, "learning_rate": 4.168461235312277e-06, "loss": 0.8087, "step": 16041 }, { "epoch": 0.5666323750822335, "grad_norm": 1.6170426607131958, "learning_rate": 4.167897196744923e-06, "loss": 0.757, "step": 16042 }, { "epoch": 0.5666676968859414, "grad_norm": 1.598280429840088, "learning_rate": 4.167333169067972e-06, "loss": 0.7707, "step": 16043 }, { "epoch": 0.5667030186896493, "grad_norm": 1.734721064567566, "learning_rate": 4.16676915228881e-06, "loss": 0.7864, "step": 16044 }, { "epoch": 0.5667383404933573, "grad_norm": 1.6946189403533936, "learning_rate": 4.166205146414815e-06, "loss": 0.8031, "step": 16045 }, { "epoch": 0.5667736622970652, "grad_norm": 1.748979926109314, "learning_rate": 4.165641151453371e-06, "loss": 0.7541, "step": 16046 }, { "epoch": 0.5668089841007731, "grad_norm": 1.9964072704315186, "learning_rate": 4.165077167411859e-06, "loss": 0.8221, "step": 16047 }, { "epoch": 0.566844305904481, "grad_norm": 0.8827033638954163, "learning_rate": 4.164513194297659e-06, "loss": 0.5867, "step": 16048 }, { "epoch": 0.5668796277081889, "grad_norm": 1.871762990951538, "learning_rate": 4.163949232118154e-06, "loss": 0.8094, "step": 16049 }, { "epoch": 0.5669149495118968, "grad_norm": 1.703757882118225, "learning_rate": 4.163385280880725e-06, "loss": 0.7687, "step": 16050 }, { "epoch": 0.5669502713156047, "grad_norm": 1.6821107864379883, "learning_rate": 4.162821340592751e-06, "loss": 0.7641, "step": 16051 }, { "epoch": 0.5669855931193126, "grad_norm": 2.1401748657226562, "learning_rate": 4.162257411261615e-06, "loss": 0.7793, "step": 16052 }, { "epoch": 0.5670209149230205, "grad_norm": 1.8016836643218994, "learning_rate": 4.161693492894696e-06, "loss": 0.7929, "step": 16053 }, { "epoch": 0.5670562367267284, "grad_norm": 1.6735767126083374, "learning_rate": 4.161129585499374e-06, "loss": 0.794, "step": 16054 }, { "epoch": 0.5670915585304364, "grad_norm": 1.733298659324646, "learning_rate": 4.160565689083032e-06, "loss": 0.7842, "step": 16055 }, { "epoch": 0.5671268803341443, "grad_norm": 1.672955870628357, "learning_rate": 4.160001803653048e-06, "loss": 0.7742, "step": 16056 }, { "epoch": 0.5671622021378522, "grad_norm": 1.5153586864471436, "learning_rate": 4.159437929216802e-06, "loss": 0.7553, "step": 16057 }, { "epoch": 0.5671975239415601, "grad_norm": 1.7129813432693481, "learning_rate": 4.158874065781676e-06, "loss": 0.7928, "step": 16058 }, { "epoch": 0.567232845745268, "grad_norm": 1.682463526725769, "learning_rate": 4.158310213355047e-06, "loss": 0.8079, "step": 16059 }, { "epoch": 0.5672681675489759, "grad_norm": 1.8866761922836304, "learning_rate": 4.157746371944296e-06, "loss": 0.814, "step": 16060 }, { "epoch": 0.5673034893526838, "grad_norm": 1.9640376567840576, "learning_rate": 4.1571825415568e-06, "loss": 0.7888, "step": 16061 }, { "epoch": 0.5673388111563917, "grad_norm": 2.22049880027771, "learning_rate": 4.156618722199943e-06, "loss": 0.764, "step": 16062 }, { "epoch": 0.5673741329600996, "grad_norm": 1.852393388748169, "learning_rate": 4.156054913881102e-06, "loss": 0.8378, "step": 16063 }, { "epoch": 0.5674094547638076, "grad_norm": 1.525286078453064, "learning_rate": 4.155491116607653e-06, "loss": 0.7172, "step": 16064 }, { "epoch": 0.5674447765675155, "grad_norm": 1.6813695430755615, "learning_rate": 4.1549273303869785e-06, "loss": 0.7839, "step": 16065 }, { "epoch": 0.5674800983712234, "grad_norm": 1.5661935806274414, "learning_rate": 4.154363555226457e-06, "loss": 0.8051, "step": 16066 }, { "epoch": 0.5675154201749313, "grad_norm": 1.7398499250411987, "learning_rate": 4.153799791133466e-06, "loss": 0.8139, "step": 16067 }, { "epoch": 0.5675507419786391, "grad_norm": 1.8459084033966064, "learning_rate": 4.1532360381153845e-06, "loss": 0.7837, "step": 16068 }, { "epoch": 0.567586063782347, "grad_norm": 0.9143821001052856, "learning_rate": 4.152672296179589e-06, "loss": 0.5828, "step": 16069 }, { "epoch": 0.5676213855860549, "grad_norm": 1.7720913887023926, "learning_rate": 4.152108565333461e-06, "loss": 0.7598, "step": 16070 }, { "epoch": 0.5676567073897628, "grad_norm": 1.5578114986419678, "learning_rate": 4.151544845584376e-06, "loss": 0.7481, "step": 16071 }, { "epoch": 0.5676920291934707, "grad_norm": 1.7756273746490479, "learning_rate": 4.150981136939713e-06, "loss": 0.7933, "step": 16072 }, { "epoch": 0.5677273509971786, "grad_norm": 1.6466429233551025, "learning_rate": 4.150417439406849e-06, "loss": 0.7695, "step": 16073 }, { "epoch": 0.5677626728008865, "grad_norm": 1.7576396465301514, "learning_rate": 4.149853752993162e-06, "loss": 0.7905, "step": 16074 }, { "epoch": 0.5677979946045945, "grad_norm": 1.9882967472076416, "learning_rate": 4.149290077706029e-06, "loss": 0.7717, "step": 16075 }, { "epoch": 0.5678333164083024, "grad_norm": 1.5604256391525269, "learning_rate": 4.148726413552827e-06, "loss": 0.7683, "step": 16076 }, { "epoch": 0.5678686382120103, "grad_norm": 1.6691018342971802, "learning_rate": 4.1481627605409355e-06, "loss": 0.784, "step": 16077 }, { "epoch": 0.5679039600157182, "grad_norm": 1.778016448020935, "learning_rate": 4.147599118677729e-06, "loss": 0.8044, "step": 16078 }, { "epoch": 0.5679392818194261, "grad_norm": 1.727944016456604, "learning_rate": 4.1470354879705845e-06, "loss": 0.7815, "step": 16079 }, { "epoch": 0.567974603623134, "grad_norm": 1.9990367889404297, "learning_rate": 4.146471868426879e-06, "loss": 0.7622, "step": 16080 }, { "epoch": 0.5680099254268419, "grad_norm": 1.4507099390029907, "learning_rate": 4.14590826005399e-06, "loss": 0.7633, "step": 16081 }, { "epoch": 0.5680452472305498, "grad_norm": 1.6122725009918213, "learning_rate": 4.145344662859292e-06, "loss": 0.7812, "step": 16082 }, { "epoch": 0.5680805690342577, "grad_norm": 1.562382459640503, "learning_rate": 4.144781076850164e-06, "loss": 0.7884, "step": 16083 }, { "epoch": 0.5681158908379657, "grad_norm": 1.872300386428833, "learning_rate": 4.144217502033979e-06, "loss": 0.7948, "step": 16084 }, { "epoch": 0.5681512126416736, "grad_norm": 1.674351453781128, "learning_rate": 4.143653938418114e-06, "loss": 0.7905, "step": 16085 }, { "epoch": 0.5681865344453815, "grad_norm": 1.5949991941452026, "learning_rate": 4.1430903860099465e-06, "loss": 0.7655, "step": 16086 }, { "epoch": 0.5682218562490894, "grad_norm": 1.6913702487945557, "learning_rate": 4.142526844816852e-06, "loss": 0.8061, "step": 16087 }, { "epoch": 0.5682571780527973, "grad_norm": 1.7179193496704102, "learning_rate": 4.141963314846202e-06, "loss": 0.8054, "step": 16088 }, { "epoch": 0.5682924998565052, "grad_norm": 1.6790478229522705, "learning_rate": 4.141399796105377e-06, "loss": 0.7983, "step": 16089 }, { "epoch": 0.5683278216602131, "grad_norm": 1.6559011936187744, "learning_rate": 4.14083628860175e-06, "loss": 0.7617, "step": 16090 }, { "epoch": 0.568363143463921, "grad_norm": 1.653123378753662, "learning_rate": 4.140272792342695e-06, "loss": 0.7923, "step": 16091 }, { "epoch": 0.5683984652676289, "grad_norm": 1.6030155420303345, "learning_rate": 4.139709307335589e-06, "loss": 0.7653, "step": 16092 }, { "epoch": 0.5684337870713368, "grad_norm": 1.8392770290374756, "learning_rate": 4.139145833587805e-06, "loss": 0.7913, "step": 16093 }, { "epoch": 0.5684691088750448, "grad_norm": 1.8063907623291016, "learning_rate": 4.1385823711067196e-06, "loss": 0.784, "step": 16094 }, { "epoch": 0.5685044306787526, "grad_norm": 1.5136871337890625, "learning_rate": 4.138018919899707e-06, "loss": 0.7446, "step": 16095 }, { "epoch": 0.5685397524824605, "grad_norm": 1.7933201789855957, "learning_rate": 4.13745547997414e-06, "loss": 0.7931, "step": 16096 }, { "epoch": 0.5685750742861684, "grad_norm": 1.627997636795044, "learning_rate": 4.136892051337394e-06, "loss": 0.8189, "step": 16097 }, { "epoch": 0.5686103960898763, "grad_norm": 1.8220499753952026, "learning_rate": 4.13632863399684e-06, "loss": 0.8426, "step": 16098 }, { "epoch": 0.5686457178935842, "grad_norm": 1.750354290008545, "learning_rate": 4.135765227959857e-06, "loss": 0.8171, "step": 16099 }, { "epoch": 0.5686810396972921, "grad_norm": 1.5404205322265625, "learning_rate": 4.135201833233816e-06, "loss": 0.7743, "step": 16100 }, { "epoch": 0.568716361501, "grad_norm": 1.7091690301895142, "learning_rate": 4.134638449826089e-06, "loss": 0.7727, "step": 16101 }, { "epoch": 0.5687516833047079, "grad_norm": 1.7143073081970215, "learning_rate": 4.134075077744053e-06, "loss": 0.7859, "step": 16102 }, { "epoch": 0.5687870051084158, "grad_norm": 1.6272567510604858, "learning_rate": 4.133511716995077e-06, "loss": 0.7813, "step": 16103 }, { "epoch": 0.5688223269121238, "grad_norm": 1.7355282306671143, "learning_rate": 4.13294836758654e-06, "loss": 0.7839, "step": 16104 }, { "epoch": 0.5688576487158317, "grad_norm": 1.6909019947052002, "learning_rate": 4.132385029525811e-06, "loss": 0.7898, "step": 16105 }, { "epoch": 0.5688929705195396, "grad_norm": 1.7295643091201782, "learning_rate": 4.131821702820263e-06, "loss": 0.8025, "step": 16106 }, { "epoch": 0.5689282923232475, "grad_norm": 1.671777367591858, "learning_rate": 4.13125838747727e-06, "loss": 0.8249, "step": 16107 }, { "epoch": 0.5689636141269554, "grad_norm": 1.6986055374145508, "learning_rate": 4.130695083504203e-06, "loss": 0.7914, "step": 16108 }, { "epoch": 0.5689989359306633, "grad_norm": 1.583884835243225, "learning_rate": 4.130131790908436e-06, "loss": 0.7912, "step": 16109 }, { "epoch": 0.5690342577343712, "grad_norm": 1.757401704788208, "learning_rate": 4.129568509697341e-06, "loss": 0.7727, "step": 16110 }, { "epoch": 0.5690695795380791, "grad_norm": 0.9570844769477844, "learning_rate": 4.129005239878289e-06, "loss": 0.6046, "step": 16111 }, { "epoch": 0.569104901341787, "grad_norm": 1.5734697580337524, "learning_rate": 4.128441981458652e-06, "loss": 0.7553, "step": 16112 }, { "epoch": 0.569140223145495, "grad_norm": 1.680619716644287, "learning_rate": 4.1278787344458045e-06, "loss": 0.7834, "step": 16113 }, { "epoch": 0.5691755449492029, "grad_norm": 1.612904667854309, "learning_rate": 4.127315498847116e-06, "loss": 0.7852, "step": 16114 }, { "epoch": 0.5692108667529108, "grad_norm": 2.1182596683502197, "learning_rate": 4.126752274669958e-06, "loss": 0.8205, "step": 16115 }, { "epoch": 0.5692461885566187, "grad_norm": 1.797119140625, "learning_rate": 4.1261890619217e-06, "loss": 0.8107, "step": 16116 }, { "epoch": 0.5692815103603266, "grad_norm": 1.7175642251968384, "learning_rate": 4.125625860609717e-06, "loss": 0.7837, "step": 16117 }, { "epoch": 0.5693168321640345, "grad_norm": 1.7871266603469849, "learning_rate": 4.125062670741379e-06, "loss": 0.7875, "step": 16118 }, { "epoch": 0.5693521539677424, "grad_norm": 1.709123969078064, "learning_rate": 4.124499492324055e-06, "loss": 0.8097, "step": 16119 }, { "epoch": 0.5693874757714503, "grad_norm": 1.6014065742492676, "learning_rate": 4.123936325365118e-06, "loss": 0.7806, "step": 16120 }, { "epoch": 0.5694227975751581, "grad_norm": 1.8575350046157837, "learning_rate": 4.123373169871937e-06, "loss": 0.7824, "step": 16121 }, { "epoch": 0.569458119378866, "grad_norm": 1.9432594776153564, "learning_rate": 4.122810025851882e-06, "loss": 0.7985, "step": 16122 }, { "epoch": 0.569493441182574, "grad_norm": 1.959903597831726, "learning_rate": 4.122246893312326e-06, "loss": 0.788, "step": 16123 }, { "epoch": 0.5695287629862819, "grad_norm": 1.8117074966430664, "learning_rate": 4.121683772260639e-06, "loss": 0.7796, "step": 16124 }, { "epoch": 0.5695640847899898, "grad_norm": 1.6514956951141357, "learning_rate": 4.121120662704187e-06, "loss": 0.7846, "step": 16125 }, { "epoch": 0.5695994065936977, "grad_norm": 1.6430704593658447, "learning_rate": 4.120557564650344e-06, "loss": 0.7855, "step": 16126 }, { "epoch": 0.5696347283974056, "grad_norm": 1.5416444540023804, "learning_rate": 4.119994478106477e-06, "loss": 0.7859, "step": 16127 }, { "epoch": 0.5696700502011135, "grad_norm": 1.6527684926986694, "learning_rate": 4.119431403079958e-06, "loss": 0.7454, "step": 16128 }, { "epoch": 0.5697053720048214, "grad_norm": 2.1204590797424316, "learning_rate": 4.118868339578155e-06, "loss": 0.804, "step": 16129 }, { "epoch": 0.5697406938085293, "grad_norm": 1.6290802955627441, "learning_rate": 4.1183052876084376e-06, "loss": 0.7851, "step": 16130 }, { "epoch": 0.5697760156122372, "grad_norm": 1.6705137491226196, "learning_rate": 4.117742247178175e-06, "loss": 0.7406, "step": 16131 }, { "epoch": 0.5698113374159451, "grad_norm": 1.5950117111206055, "learning_rate": 4.117179218294737e-06, "loss": 0.7896, "step": 16132 }, { "epoch": 0.569846659219653, "grad_norm": 1.8443043231964111, "learning_rate": 4.1166162009654905e-06, "loss": 0.8, "step": 16133 }, { "epoch": 0.569881981023361, "grad_norm": 1.504660964012146, "learning_rate": 4.116053195197806e-06, "loss": 0.7549, "step": 16134 }, { "epoch": 0.5699173028270689, "grad_norm": 1.6946358680725098, "learning_rate": 4.115490200999049e-06, "loss": 0.7626, "step": 16135 }, { "epoch": 0.5699526246307768, "grad_norm": 1.6616885662078857, "learning_rate": 4.114927218376591e-06, "loss": 0.8137, "step": 16136 }, { "epoch": 0.5699879464344847, "grad_norm": 1.8022582530975342, "learning_rate": 4.114364247337799e-06, "loss": 0.7936, "step": 16137 }, { "epoch": 0.5700232682381926, "grad_norm": 1.7517262697219849, "learning_rate": 4.1138012878900415e-06, "loss": 0.7728, "step": 16138 }, { "epoch": 0.5700585900419005, "grad_norm": 1.8810244798660278, "learning_rate": 4.113238340040686e-06, "loss": 0.7736, "step": 16139 }, { "epoch": 0.5700939118456084, "grad_norm": 1.6682484149932861, "learning_rate": 4.1126754037971e-06, "loss": 0.7454, "step": 16140 }, { "epoch": 0.5701292336493163, "grad_norm": 1.6685223579406738, "learning_rate": 4.112112479166652e-06, "loss": 0.7901, "step": 16141 }, { "epoch": 0.5701645554530242, "grad_norm": 1.7325589656829834, "learning_rate": 4.1115495661567095e-06, "loss": 0.8013, "step": 16142 }, { "epoch": 0.5701998772567322, "grad_norm": 1.8073055744171143, "learning_rate": 4.110986664774638e-06, "loss": 0.7887, "step": 16143 }, { "epoch": 0.5702351990604401, "grad_norm": 1.5162990093231201, "learning_rate": 4.110423775027807e-06, "loss": 0.7258, "step": 16144 }, { "epoch": 0.570270520864148, "grad_norm": 1.6612646579742432, "learning_rate": 4.109860896923583e-06, "loss": 0.7927, "step": 16145 }, { "epoch": 0.5703058426678559, "grad_norm": 1.7734767198562622, "learning_rate": 4.10929803046933e-06, "loss": 0.7757, "step": 16146 }, { "epoch": 0.5703411644715637, "grad_norm": 1.6344832181930542, "learning_rate": 4.108735175672419e-06, "loss": 0.829, "step": 16147 }, { "epoch": 0.5703764862752716, "grad_norm": 1.62393319606781, "learning_rate": 4.108172332540215e-06, "loss": 0.8061, "step": 16148 }, { "epoch": 0.5704118080789795, "grad_norm": 1.5654734373092651, "learning_rate": 4.107609501080081e-06, "loss": 0.7402, "step": 16149 }, { "epoch": 0.5704471298826874, "grad_norm": 1.8611929416656494, "learning_rate": 4.107046681299388e-06, "loss": 0.7934, "step": 16150 }, { "epoch": 0.5704824516863953, "grad_norm": 1.8964914083480835, "learning_rate": 4.106483873205501e-06, "loss": 0.7708, "step": 16151 }, { "epoch": 0.5705177734901032, "grad_norm": 1.6052054166793823, "learning_rate": 4.105921076805785e-06, "loss": 0.7749, "step": 16152 }, { "epoch": 0.5705530952938112, "grad_norm": 1.641014814376831, "learning_rate": 4.105358292107604e-06, "loss": 0.7601, "step": 16153 }, { "epoch": 0.5705884170975191, "grad_norm": 1.6684200763702393, "learning_rate": 4.104795519118327e-06, "loss": 0.7944, "step": 16154 }, { "epoch": 0.570623738901227, "grad_norm": 1.682998776435852, "learning_rate": 4.1042327578453174e-06, "loss": 0.8046, "step": 16155 }, { "epoch": 0.5706590607049349, "grad_norm": 3.429471492767334, "learning_rate": 4.1036700082959404e-06, "loss": 0.7765, "step": 16156 }, { "epoch": 0.5706943825086428, "grad_norm": 1.7916901111602783, "learning_rate": 4.103107270477563e-06, "loss": 0.8105, "step": 16157 }, { "epoch": 0.5707297043123507, "grad_norm": 1.7992513179779053, "learning_rate": 4.102544544397549e-06, "loss": 0.7994, "step": 16158 }, { "epoch": 0.5707650261160586, "grad_norm": 1.6517096757888794, "learning_rate": 4.101981830063262e-06, "loss": 0.8002, "step": 16159 }, { "epoch": 0.5708003479197665, "grad_norm": 1.7708401679992676, "learning_rate": 4.101419127482069e-06, "loss": 0.7866, "step": 16160 }, { "epoch": 0.5708356697234744, "grad_norm": 1.686238169670105, "learning_rate": 4.100856436661333e-06, "loss": 0.7795, "step": 16161 }, { "epoch": 0.5708709915271823, "grad_norm": 1.9574488401412964, "learning_rate": 4.100293757608419e-06, "loss": 0.7976, "step": 16162 }, { "epoch": 0.5709063133308903, "grad_norm": 1.8622114658355713, "learning_rate": 4.099731090330692e-06, "loss": 0.8062, "step": 16163 }, { "epoch": 0.5709416351345982, "grad_norm": 1.6581116914749146, "learning_rate": 4.099168434835514e-06, "loss": 0.788, "step": 16164 }, { "epoch": 0.5709769569383061, "grad_norm": 1.8265570402145386, "learning_rate": 4.098605791130251e-06, "loss": 0.7818, "step": 16165 }, { "epoch": 0.571012278742014, "grad_norm": 1.7191189527511597, "learning_rate": 4.098043159222267e-06, "loss": 0.8101, "step": 16166 }, { "epoch": 0.5710476005457219, "grad_norm": 2.0605227947235107, "learning_rate": 4.097480539118922e-06, "loss": 0.8017, "step": 16167 }, { "epoch": 0.5710829223494298, "grad_norm": 1.779439926147461, "learning_rate": 4.096917930827584e-06, "loss": 0.8117, "step": 16168 }, { "epoch": 0.5711182441531377, "grad_norm": 2.0181639194488525, "learning_rate": 4.096355334355616e-06, "loss": 0.8, "step": 16169 }, { "epoch": 0.5711535659568456, "grad_norm": 1.6406946182250977, "learning_rate": 4.095792749710377e-06, "loss": 0.8219, "step": 16170 }, { "epoch": 0.5711888877605535, "grad_norm": 1.6468181610107422, "learning_rate": 4.095230176899233e-06, "loss": 0.7883, "step": 16171 }, { "epoch": 0.5712242095642615, "grad_norm": 1.5821003913879395, "learning_rate": 4.0946676159295466e-06, "loss": 0.7728, "step": 16172 }, { "epoch": 0.5712595313679693, "grad_norm": 1.7475162744522095, "learning_rate": 4.09410506680868e-06, "loss": 0.7748, "step": 16173 }, { "epoch": 0.5712948531716772, "grad_norm": 1.639655351638794, "learning_rate": 4.093542529543995e-06, "loss": 0.7808, "step": 16174 }, { "epoch": 0.5713301749753851, "grad_norm": 1.0357838869094849, "learning_rate": 4.092980004142857e-06, "loss": 0.5685, "step": 16175 }, { "epoch": 0.571365496779093, "grad_norm": 1.5949000120162964, "learning_rate": 4.092417490612625e-06, "loss": 0.7505, "step": 16176 }, { "epoch": 0.5714008185828009, "grad_norm": 1.6638387441635132, "learning_rate": 4.091854988960661e-06, "loss": 0.7888, "step": 16177 }, { "epoch": 0.5714361403865088, "grad_norm": 1.6535818576812744, "learning_rate": 4.091292499194331e-06, "loss": 0.7944, "step": 16178 }, { "epoch": 0.5714714621902167, "grad_norm": 1.621529459953308, "learning_rate": 4.0907300213209925e-06, "loss": 0.8011, "step": 16179 }, { "epoch": 0.5715067839939246, "grad_norm": 1.714733600616455, "learning_rate": 4.090167555348007e-06, "loss": 0.8069, "step": 16180 }, { "epoch": 0.5715421057976325, "grad_norm": 1.7145313024520874, "learning_rate": 4.08960510128274e-06, "loss": 0.8003, "step": 16181 }, { "epoch": 0.5715774276013404, "grad_norm": 1.5710612535476685, "learning_rate": 4.08904265913255e-06, "loss": 0.7702, "step": 16182 }, { "epoch": 0.5716127494050484, "grad_norm": 1.857694387435913, "learning_rate": 4.088480228904797e-06, "loss": 0.7862, "step": 16183 }, { "epoch": 0.5716480712087563, "grad_norm": 1.6243101358413696, "learning_rate": 4.087917810606844e-06, "loss": 0.7487, "step": 16184 }, { "epoch": 0.5716833930124642, "grad_norm": 1.746025800704956, "learning_rate": 4.087355404246051e-06, "loss": 0.7971, "step": 16185 }, { "epoch": 0.5717187148161721, "grad_norm": 1.6915520429611206, "learning_rate": 4.0867930098297805e-06, "loss": 0.7725, "step": 16186 }, { "epoch": 0.57175403661988, "grad_norm": 1.7002767324447632, "learning_rate": 4.08623062736539e-06, "loss": 0.8483, "step": 16187 }, { "epoch": 0.5717893584235879, "grad_norm": 1.6189229488372803, "learning_rate": 4.085668256860244e-06, "loss": 0.7713, "step": 16188 }, { "epoch": 0.5718246802272958, "grad_norm": 1.8767229318618774, "learning_rate": 4.085105898321697e-06, "loss": 0.805, "step": 16189 }, { "epoch": 0.5718600020310037, "grad_norm": 1.8986347913742065, "learning_rate": 4.084543551757112e-06, "loss": 0.7834, "step": 16190 }, { "epoch": 0.5718953238347116, "grad_norm": 1.587965726852417, "learning_rate": 4.08398121717385e-06, "loss": 0.79, "step": 16191 }, { "epoch": 0.5719306456384196, "grad_norm": 2.161147117614746, "learning_rate": 4.08341889457927e-06, "loss": 0.8152, "step": 16192 }, { "epoch": 0.5719659674421275, "grad_norm": 1.5737850666046143, "learning_rate": 4.082856583980729e-06, "loss": 0.7966, "step": 16193 }, { "epoch": 0.5720012892458354, "grad_norm": 1.5148087739944458, "learning_rate": 4.082294285385591e-06, "loss": 0.7639, "step": 16194 }, { "epoch": 0.5720366110495433, "grad_norm": 1.6632330417633057, "learning_rate": 4.081731998801211e-06, "loss": 0.7682, "step": 16195 }, { "epoch": 0.5720719328532512, "grad_norm": 1.5309622287750244, "learning_rate": 4.081169724234951e-06, "loss": 0.7465, "step": 16196 }, { "epoch": 0.5721072546569591, "grad_norm": 1.7559335231781006, "learning_rate": 4.080607461694169e-06, "loss": 0.8044, "step": 16197 }, { "epoch": 0.572142576460667, "grad_norm": 1.6751047372817993, "learning_rate": 4.080045211186222e-06, "loss": 0.8283, "step": 16198 }, { "epoch": 0.5721778982643748, "grad_norm": 1.7245699167251587, "learning_rate": 4.079482972718473e-06, "loss": 0.771, "step": 16199 }, { "epoch": 0.5722132200680827, "grad_norm": 1.6936372518539429, "learning_rate": 4.078920746298277e-06, "loss": 0.8038, "step": 16200 }, { "epoch": 0.5722485418717906, "grad_norm": 1.6818604469299316, "learning_rate": 4.078358531932992e-06, "loss": 0.7992, "step": 16201 }, { "epoch": 0.5722838636754985, "grad_norm": 1.874786376953125, "learning_rate": 4.077796329629978e-06, "loss": 0.8073, "step": 16202 }, { "epoch": 0.5723191854792065, "grad_norm": 1.7368197441101074, "learning_rate": 4.077234139396593e-06, "loss": 0.8083, "step": 16203 }, { "epoch": 0.5723545072829144, "grad_norm": 1.818865180015564, "learning_rate": 4.076671961240193e-06, "loss": 0.7606, "step": 16204 }, { "epoch": 0.5723898290866223, "grad_norm": 1.620039701461792, "learning_rate": 4.076109795168138e-06, "loss": 0.822, "step": 16205 }, { "epoch": 0.5724251508903302, "grad_norm": 1.722010850906372, "learning_rate": 4.075547641187784e-06, "loss": 0.8069, "step": 16206 }, { "epoch": 0.5724604726940381, "grad_norm": 1.6114270687103271, "learning_rate": 4.0749854993064895e-06, "loss": 0.7589, "step": 16207 }, { "epoch": 0.572495794497746, "grad_norm": 1.7539501190185547, "learning_rate": 4.074423369531608e-06, "loss": 0.7932, "step": 16208 }, { "epoch": 0.5725311163014539, "grad_norm": 1.7428170442581177, "learning_rate": 4.073861251870502e-06, "loss": 0.758, "step": 16209 }, { "epoch": 0.5725664381051618, "grad_norm": 1.7924482822418213, "learning_rate": 4.0732991463305255e-06, "loss": 0.784, "step": 16210 }, { "epoch": 0.5726017599088697, "grad_norm": 1.6362512111663818, "learning_rate": 4.072737052919034e-06, "loss": 0.829, "step": 16211 }, { "epoch": 0.5726370817125777, "grad_norm": 1.5292819738388062, "learning_rate": 4.072174971643387e-06, "loss": 0.7394, "step": 16212 }, { "epoch": 0.5726724035162856, "grad_norm": 1.9181431531906128, "learning_rate": 4.071612902510939e-06, "loss": 0.8031, "step": 16213 }, { "epoch": 0.5727077253199935, "grad_norm": 1.6926876306533813, "learning_rate": 4.071050845529047e-06, "loss": 0.7639, "step": 16214 }, { "epoch": 0.5727430471237014, "grad_norm": 1.912814736366272, "learning_rate": 4.070488800705067e-06, "loss": 0.7596, "step": 16215 }, { "epoch": 0.5727783689274093, "grad_norm": 1.5924869775772095, "learning_rate": 4.069926768046356e-06, "loss": 0.76, "step": 16216 }, { "epoch": 0.5728136907311172, "grad_norm": 1.7385741472244263, "learning_rate": 4.069364747560266e-06, "loss": 0.8062, "step": 16217 }, { "epoch": 0.5728490125348251, "grad_norm": 1.7259480953216553, "learning_rate": 4.068802739254157e-06, "loss": 0.7855, "step": 16218 }, { "epoch": 0.572884334338533, "grad_norm": 2.0777039527893066, "learning_rate": 4.0682407431353825e-06, "loss": 0.8413, "step": 16219 }, { "epoch": 0.5729196561422409, "grad_norm": 2.1006925106048584, "learning_rate": 4.067678759211298e-06, "loss": 0.7971, "step": 16220 }, { "epoch": 0.5729549779459489, "grad_norm": 1.7056015729904175, "learning_rate": 4.067116787489259e-06, "loss": 0.8336, "step": 16221 }, { "epoch": 0.5729902997496568, "grad_norm": 1.7321072816848755, "learning_rate": 4.066554827976619e-06, "loss": 0.7882, "step": 16222 }, { "epoch": 0.5730256215533647, "grad_norm": 1.6050708293914795, "learning_rate": 4.065992880680735e-06, "loss": 0.7714, "step": 16223 }, { "epoch": 0.5730609433570726, "grad_norm": 1.022134780883789, "learning_rate": 4.06543094560896e-06, "loss": 0.6121, "step": 16224 }, { "epoch": 0.5730962651607804, "grad_norm": 1.7530157566070557, "learning_rate": 4.064869022768651e-06, "loss": 0.7682, "step": 16225 }, { "epoch": 0.5731315869644883, "grad_norm": 1.853173017501831, "learning_rate": 4.064307112167159e-06, "loss": 0.7915, "step": 16226 }, { "epoch": 0.5731669087681962, "grad_norm": 1.6093381643295288, "learning_rate": 4.063745213811839e-06, "loss": 0.7724, "step": 16227 }, { "epoch": 0.5732022305719041, "grad_norm": 1.5572720766067505, "learning_rate": 4.063183327710046e-06, "loss": 0.7715, "step": 16228 }, { "epoch": 0.573237552375612, "grad_norm": 1.6389729976654053, "learning_rate": 4.062621453869133e-06, "loss": 0.7655, "step": 16229 }, { "epoch": 0.5732728741793199, "grad_norm": 1.8098106384277344, "learning_rate": 4.062059592296456e-06, "loss": 0.7845, "step": 16230 }, { "epoch": 0.5733081959830278, "grad_norm": 1.750907301902771, "learning_rate": 4.0614977429993655e-06, "loss": 0.7606, "step": 16231 }, { "epoch": 0.5733435177867358, "grad_norm": 1.6872234344482422, "learning_rate": 4.060935905985215e-06, "loss": 0.7836, "step": 16232 }, { "epoch": 0.5733788395904437, "grad_norm": 1.8899592161178589, "learning_rate": 4.06037408126136e-06, "loss": 0.7847, "step": 16233 }, { "epoch": 0.5734141613941516, "grad_norm": 1.853272795677185, "learning_rate": 4.059812268835153e-06, "loss": 0.7963, "step": 16234 }, { "epoch": 0.5734494831978595, "grad_norm": 1.955958604812622, "learning_rate": 4.059250468713945e-06, "loss": 0.7877, "step": 16235 }, { "epoch": 0.5734848050015674, "grad_norm": 1.597081184387207, "learning_rate": 4.0586886809050905e-06, "loss": 0.7797, "step": 16236 }, { "epoch": 0.5735201268052753, "grad_norm": 1.7240452766418457, "learning_rate": 4.058126905415942e-06, "loss": 0.7601, "step": 16237 }, { "epoch": 0.5735554486089832, "grad_norm": 1.5792200565338135, "learning_rate": 4.0575651422538506e-06, "loss": 0.7795, "step": 16238 }, { "epoch": 0.5735907704126911, "grad_norm": 1.7543131113052368, "learning_rate": 4.057003391426171e-06, "loss": 0.7781, "step": 16239 }, { "epoch": 0.573626092216399, "grad_norm": 1.8643726110458374, "learning_rate": 4.056441652940254e-06, "loss": 0.7831, "step": 16240 }, { "epoch": 0.573661414020107, "grad_norm": 1.7258572578430176, "learning_rate": 4.055879926803449e-06, "loss": 0.8274, "step": 16241 }, { "epoch": 0.5736967358238149, "grad_norm": 2.298813819885254, "learning_rate": 4.055318213023111e-06, "loss": 0.8017, "step": 16242 }, { "epoch": 0.5737320576275228, "grad_norm": 1.7587859630584717, "learning_rate": 4.0547565116065925e-06, "loss": 0.772, "step": 16243 }, { "epoch": 0.5737673794312307, "grad_norm": 1.6582274436950684, "learning_rate": 4.054194822561242e-06, "loss": 0.7953, "step": 16244 }, { "epoch": 0.5738027012349386, "grad_norm": 1.6637705564498901, "learning_rate": 4.05363314589441e-06, "loss": 0.8128, "step": 16245 }, { "epoch": 0.5738380230386465, "grad_norm": 2.0908806324005127, "learning_rate": 4.053071481613451e-06, "loss": 0.7646, "step": 16246 }, { "epoch": 0.5738733448423544, "grad_norm": 1.7983886003494263, "learning_rate": 4.052509829725715e-06, "loss": 0.7748, "step": 16247 }, { "epoch": 0.5739086666460623, "grad_norm": 1.8311082124710083, "learning_rate": 4.0519481902385505e-06, "loss": 0.8107, "step": 16248 }, { "epoch": 0.5739439884497702, "grad_norm": 1.6591405868530273, "learning_rate": 4.051386563159311e-06, "loss": 0.782, "step": 16249 }, { "epoch": 0.5739793102534781, "grad_norm": 1.7913585901260376, "learning_rate": 4.050824948495346e-06, "loss": 0.7913, "step": 16250 }, { "epoch": 0.574014632057186, "grad_norm": 1.8388358354568481, "learning_rate": 4.0502633462540055e-06, "loss": 0.7881, "step": 16251 }, { "epoch": 0.5740499538608939, "grad_norm": 1.6763451099395752, "learning_rate": 4.04970175644264e-06, "loss": 0.8208, "step": 16252 }, { "epoch": 0.5740852756646018, "grad_norm": 1.6931275129318237, "learning_rate": 4.049140179068599e-06, "loss": 0.7837, "step": 16253 }, { "epoch": 0.5741205974683097, "grad_norm": 1.883079171180725, "learning_rate": 4.048578614139231e-06, "loss": 0.8057, "step": 16254 }, { "epoch": 0.5741559192720176, "grad_norm": 1.7151398658752441, "learning_rate": 4.048017061661889e-06, "loss": 0.8216, "step": 16255 }, { "epoch": 0.5741912410757255, "grad_norm": 1.996363639831543, "learning_rate": 4.04745552164392e-06, "loss": 0.8219, "step": 16256 }, { "epoch": 0.5742265628794334, "grad_norm": 1.5860790014266968, "learning_rate": 4.046893994092676e-06, "loss": 0.7877, "step": 16257 }, { "epoch": 0.5742618846831413, "grad_norm": 2.776733636856079, "learning_rate": 4.046332479015503e-06, "loss": 0.7572, "step": 16258 }, { "epoch": 0.5742972064868492, "grad_norm": 1.7502552270889282, "learning_rate": 4.04577097641975e-06, "loss": 0.7818, "step": 16259 }, { "epoch": 0.5743325282905571, "grad_norm": 1.704860806465149, "learning_rate": 4.045209486312768e-06, "loss": 0.7919, "step": 16260 }, { "epoch": 0.574367850094265, "grad_norm": 1.6780105829238892, "learning_rate": 4.0446480087019056e-06, "loss": 0.7669, "step": 16261 }, { "epoch": 0.574403171897973, "grad_norm": 1.7398396730422974, "learning_rate": 4.044086543594511e-06, "loss": 0.7845, "step": 16262 }, { "epoch": 0.5744384937016809, "grad_norm": 1.668841004371643, "learning_rate": 4.0435250909979315e-06, "loss": 0.8173, "step": 16263 }, { "epoch": 0.5744738155053888, "grad_norm": 1.6137088537216187, "learning_rate": 4.0429636509195146e-06, "loss": 0.7612, "step": 16264 }, { "epoch": 0.5745091373090967, "grad_norm": 1.9646095037460327, "learning_rate": 4.042402223366611e-06, "loss": 0.8233, "step": 16265 }, { "epoch": 0.5745444591128046, "grad_norm": 1.7983875274658203, "learning_rate": 4.041840808346564e-06, "loss": 0.7598, "step": 16266 }, { "epoch": 0.5745797809165125, "grad_norm": 1.7154031991958618, "learning_rate": 4.041279405866728e-06, "loss": 0.7739, "step": 16267 }, { "epoch": 0.5746151027202204, "grad_norm": 1.6034387350082397, "learning_rate": 4.040718015934445e-06, "loss": 0.7918, "step": 16268 }, { "epoch": 0.5746504245239283, "grad_norm": 1.7073032855987549, "learning_rate": 4.040156638557064e-06, "loss": 0.825, "step": 16269 }, { "epoch": 0.5746857463276362, "grad_norm": 1.555027961730957, "learning_rate": 4.0395952737419335e-06, "loss": 0.7671, "step": 16270 }, { "epoch": 0.5747210681313442, "grad_norm": 1.6701135635375977, "learning_rate": 4.0390339214964e-06, "loss": 0.7831, "step": 16271 }, { "epoch": 0.5747563899350521, "grad_norm": 1.7556558847427368, "learning_rate": 4.038472581827808e-06, "loss": 0.7849, "step": 16272 }, { "epoch": 0.57479171173876, "grad_norm": 1.6719781160354614, "learning_rate": 4.037911254743508e-06, "loss": 0.7762, "step": 16273 }, { "epoch": 0.5748270335424679, "grad_norm": 1.6871222257614136, "learning_rate": 4.037349940250844e-06, "loss": 0.798, "step": 16274 }, { "epoch": 0.5748623553461758, "grad_norm": 1.6091177463531494, "learning_rate": 4.036788638357162e-06, "loss": 0.7785, "step": 16275 }, { "epoch": 0.5748976771498837, "grad_norm": 1.6161677837371826, "learning_rate": 4.03622734906981e-06, "loss": 0.8032, "step": 16276 }, { "epoch": 0.5749329989535915, "grad_norm": 1.680486798286438, "learning_rate": 4.035666072396134e-06, "loss": 0.7777, "step": 16277 }, { "epoch": 0.5749683207572994, "grad_norm": 1.793494701385498, "learning_rate": 4.035104808343479e-06, "loss": 0.7859, "step": 16278 }, { "epoch": 0.5750036425610073, "grad_norm": 0.9810686707496643, "learning_rate": 4.03454355691919e-06, "loss": 0.5708, "step": 16279 }, { "epoch": 0.5750389643647152, "grad_norm": 1.6851412057876587, "learning_rate": 4.033982318130615e-06, "loss": 0.801, "step": 16280 }, { "epoch": 0.5750742861684232, "grad_norm": 1.6261441707611084, "learning_rate": 4.033421091985097e-06, "loss": 0.7685, "step": 16281 }, { "epoch": 0.5751096079721311, "grad_norm": 1.5731860399246216, "learning_rate": 4.032859878489982e-06, "loss": 0.7691, "step": 16282 }, { "epoch": 0.575144929775839, "grad_norm": 1.6885199546813965, "learning_rate": 4.032298677652615e-06, "loss": 0.7824, "step": 16283 }, { "epoch": 0.5751802515795469, "grad_norm": 1.9510436058044434, "learning_rate": 4.031737489480341e-06, "loss": 0.7454, "step": 16284 }, { "epoch": 0.5752155733832548, "grad_norm": 1.654056429862976, "learning_rate": 4.031176313980504e-06, "loss": 0.8161, "step": 16285 }, { "epoch": 0.5752508951869627, "grad_norm": 1.5653139352798462, "learning_rate": 4.0306151511604505e-06, "loss": 0.769, "step": 16286 }, { "epoch": 0.5752862169906706, "grad_norm": 1.5407975912094116, "learning_rate": 4.0300540010275236e-06, "loss": 0.7592, "step": 16287 }, { "epoch": 0.5753215387943785, "grad_norm": 1.952266812324524, "learning_rate": 4.029492863589066e-06, "loss": 0.7813, "step": 16288 }, { "epoch": 0.5753568605980864, "grad_norm": 1.501976728439331, "learning_rate": 4.028931738852425e-06, "loss": 0.7561, "step": 16289 }, { "epoch": 0.5753921824017943, "grad_norm": 1.7509241104125977, "learning_rate": 4.028370626824941e-06, "loss": 0.7993, "step": 16290 }, { "epoch": 0.5754275042055023, "grad_norm": 2.4859423637390137, "learning_rate": 4.027809527513962e-06, "loss": 0.7959, "step": 16291 }, { "epoch": 0.5754628260092102, "grad_norm": 1.5118087530136108, "learning_rate": 4.027248440926828e-06, "loss": 0.7356, "step": 16292 }, { "epoch": 0.5754981478129181, "grad_norm": 1.6072932481765747, "learning_rate": 4.026687367070883e-06, "loss": 0.8126, "step": 16293 }, { "epoch": 0.575533469616626, "grad_norm": 1.5404889583587646, "learning_rate": 4.0261263059534714e-06, "loss": 0.7713, "step": 16294 }, { "epoch": 0.5755687914203339, "grad_norm": 1.6008368730545044, "learning_rate": 4.025565257581936e-06, "loss": 0.8075, "step": 16295 }, { "epoch": 0.5756041132240418, "grad_norm": 1.6307507753372192, "learning_rate": 4.025004221963619e-06, "loss": 0.782, "step": 16296 }, { "epoch": 0.5756394350277497, "grad_norm": 1.8197909593582153, "learning_rate": 4.024443199105863e-06, "loss": 0.8023, "step": 16297 }, { "epoch": 0.5756747568314576, "grad_norm": 1.5371748208999634, "learning_rate": 4.0238821890160125e-06, "loss": 0.7965, "step": 16298 }, { "epoch": 0.5757100786351655, "grad_norm": 1.5324277877807617, "learning_rate": 4.023321191701408e-06, "loss": 0.7458, "step": 16299 }, { "epoch": 0.5757454004388735, "grad_norm": 1.6470906734466553, "learning_rate": 4.022760207169391e-06, "loss": 0.7855, "step": 16300 }, { "epoch": 0.5757807222425814, "grad_norm": 1.589605689048767, "learning_rate": 4.022199235427306e-06, "loss": 0.7915, "step": 16301 }, { "epoch": 0.5758160440462893, "grad_norm": 1.766135573387146, "learning_rate": 4.021638276482493e-06, "loss": 0.7646, "step": 16302 }, { "epoch": 0.5758513658499971, "grad_norm": 1.6737855672836304, "learning_rate": 4.021077330342294e-06, "loss": 0.7872, "step": 16303 }, { "epoch": 0.575886687653705, "grad_norm": 1.5734816789627075, "learning_rate": 4.0205163970140505e-06, "loss": 0.7934, "step": 16304 }, { "epoch": 0.5759220094574129, "grad_norm": 1.6307071447372437, "learning_rate": 4.0199554765051056e-06, "loss": 0.7894, "step": 16305 }, { "epoch": 0.5759573312611208, "grad_norm": 0.948851466178894, "learning_rate": 4.0193945688227974e-06, "loss": 0.5786, "step": 16306 }, { "epoch": 0.5759926530648287, "grad_norm": 1.8319294452667236, "learning_rate": 4.018833673974471e-06, "loss": 0.764, "step": 16307 }, { "epoch": 0.5760279748685366, "grad_norm": 1.7163714170455933, "learning_rate": 4.0182727919674635e-06, "loss": 0.7927, "step": 16308 }, { "epoch": 0.5760632966722445, "grad_norm": 1.6515085697174072, "learning_rate": 4.017711922809117e-06, "loss": 0.7969, "step": 16309 }, { "epoch": 0.5760986184759525, "grad_norm": 1.6407793760299683, "learning_rate": 4.017151066506773e-06, "loss": 0.7982, "step": 16310 }, { "epoch": 0.5761339402796604, "grad_norm": 1.566551923751831, "learning_rate": 4.016590223067771e-06, "loss": 0.821, "step": 16311 }, { "epoch": 0.5761692620833683, "grad_norm": 1.8219759464263916, "learning_rate": 4.01602939249945e-06, "loss": 0.8168, "step": 16312 }, { "epoch": 0.5762045838870762, "grad_norm": 1.0153685808181763, "learning_rate": 4.0154685748091524e-06, "loss": 0.5784, "step": 16313 }, { "epoch": 0.5762399056907841, "grad_norm": 1.748099446296692, "learning_rate": 4.014907770004217e-06, "loss": 0.776, "step": 16314 }, { "epoch": 0.576275227494492, "grad_norm": 1.8534709215164185, "learning_rate": 4.014346978091983e-06, "loss": 0.7628, "step": 16315 }, { "epoch": 0.5763105492981999, "grad_norm": 1.6098066568374634, "learning_rate": 4.013786199079791e-06, "loss": 0.7742, "step": 16316 }, { "epoch": 0.5763458711019078, "grad_norm": 1.5761314630508423, "learning_rate": 4.013225432974982e-06, "loss": 0.8051, "step": 16317 }, { "epoch": 0.5763811929056157, "grad_norm": 1.9876279830932617, "learning_rate": 4.012664679784891e-06, "loss": 0.7811, "step": 16318 }, { "epoch": 0.5764165147093236, "grad_norm": 1.7849358320236206, "learning_rate": 4.012103939516858e-06, "loss": 0.8109, "step": 16319 }, { "epoch": 0.5764518365130316, "grad_norm": 1.6227519512176514, "learning_rate": 4.011543212178225e-06, "loss": 0.7999, "step": 16320 }, { "epoch": 0.5764871583167395, "grad_norm": 1.7924119234085083, "learning_rate": 4.0109824977763274e-06, "loss": 0.7762, "step": 16321 }, { "epoch": 0.5765224801204474, "grad_norm": 1.697851300239563, "learning_rate": 4.010421796318504e-06, "loss": 0.7705, "step": 16322 }, { "epoch": 0.5765578019241553, "grad_norm": 1.5248112678527832, "learning_rate": 4.009861107812096e-06, "loss": 0.7731, "step": 16323 }, { "epoch": 0.5765931237278632, "grad_norm": 1.6452996730804443, "learning_rate": 4.009300432264438e-06, "loss": 0.7999, "step": 16324 }, { "epoch": 0.5766284455315711, "grad_norm": 1.6956923007965088, "learning_rate": 4.008739769682871e-06, "loss": 0.7562, "step": 16325 }, { "epoch": 0.576663767335279, "grad_norm": 1.646048903465271, "learning_rate": 4.008179120074732e-06, "loss": 0.7672, "step": 16326 }, { "epoch": 0.5766990891389869, "grad_norm": 1.5139375925064087, "learning_rate": 4.007618483447356e-06, "loss": 0.753, "step": 16327 }, { "epoch": 0.5767344109426948, "grad_norm": 1.5424607992172241, "learning_rate": 4.0070578598080845e-06, "loss": 0.7498, "step": 16328 }, { "epoch": 0.5767697327464026, "grad_norm": 1.8252394199371338, "learning_rate": 4.0064972491642526e-06, "loss": 0.804, "step": 16329 }, { "epoch": 0.5768050545501106, "grad_norm": 1.8652729988098145, "learning_rate": 4.005936651523198e-06, "loss": 0.7635, "step": 16330 }, { "epoch": 0.5768403763538185, "grad_norm": 1.6273363828659058, "learning_rate": 4.005376066892258e-06, "loss": 0.8044, "step": 16331 }, { "epoch": 0.5768756981575264, "grad_norm": 1.6750351190567017, "learning_rate": 4.004815495278769e-06, "loss": 0.7776, "step": 16332 }, { "epoch": 0.5769110199612343, "grad_norm": 2.082193613052368, "learning_rate": 4.004254936690067e-06, "loss": 0.7561, "step": 16333 }, { "epoch": 0.5769463417649422, "grad_norm": 1.794171929359436, "learning_rate": 4.00369439113349e-06, "loss": 0.7983, "step": 16334 }, { "epoch": 0.5769816635686501, "grad_norm": 1.8850879669189453, "learning_rate": 4.0031338586163745e-06, "loss": 0.8292, "step": 16335 }, { "epoch": 0.577016985372358, "grad_norm": 1.6206501722335815, "learning_rate": 4.002573339146055e-06, "loss": 0.7781, "step": 16336 }, { "epoch": 0.5770523071760659, "grad_norm": 1.680106282234192, "learning_rate": 4.002012832729866e-06, "loss": 0.7987, "step": 16337 }, { "epoch": 0.5770876289797738, "grad_norm": 1.5995265245437622, "learning_rate": 4.001452339375147e-06, "loss": 0.7662, "step": 16338 }, { "epoch": 0.5771229507834817, "grad_norm": 1.7717739343643188, "learning_rate": 4.0008918590892335e-06, "loss": 0.7748, "step": 16339 }, { "epoch": 0.5771582725871897, "grad_norm": 1.8301206827163696, "learning_rate": 4.000331391879457e-06, "loss": 0.7837, "step": 16340 }, { "epoch": 0.5771935943908976, "grad_norm": 1.6363613605499268, "learning_rate": 3.9997709377531564e-06, "loss": 0.7736, "step": 16341 }, { "epoch": 0.5772289161946055, "grad_norm": 1.7553889751434326, "learning_rate": 3.999210496717666e-06, "loss": 0.7911, "step": 16342 }, { "epoch": 0.5772642379983134, "grad_norm": 1.7786831855773926, "learning_rate": 3.99865006878032e-06, "loss": 0.782, "step": 16343 }, { "epoch": 0.5772995598020213, "grad_norm": 1.5464311838150024, "learning_rate": 3.9980896539484545e-06, "loss": 0.7736, "step": 16344 }, { "epoch": 0.5773348816057292, "grad_norm": 1.7366451025009155, "learning_rate": 3.997529252229403e-06, "loss": 0.8178, "step": 16345 }, { "epoch": 0.5773702034094371, "grad_norm": 1.745368242263794, "learning_rate": 3.9969688636304985e-06, "loss": 0.7717, "step": 16346 }, { "epoch": 0.577405525213145, "grad_norm": 2.1070170402526855, "learning_rate": 3.996408488159079e-06, "loss": 0.8013, "step": 16347 }, { "epoch": 0.5774408470168529, "grad_norm": 1.8525385856628418, "learning_rate": 3.995848125822475e-06, "loss": 0.768, "step": 16348 }, { "epoch": 0.5774761688205609, "grad_norm": 1.660424828529358, "learning_rate": 3.9952877766280235e-06, "loss": 0.7687, "step": 16349 }, { "epoch": 0.5775114906242688, "grad_norm": 1.7974070310592651, "learning_rate": 3.9947274405830565e-06, "loss": 0.7636, "step": 16350 }, { "epoch": 0.5775468124279767, "grad_norm": 1.6437394618988037, "learning_rate": 3.9941671176949065e-06, "loss": 0.8022, "step": 16351 }, { "epoch": 0.5775821342316846, "grad_norm": 1.622959852218628, "learning_rate": 3.99360680797091e-06, "loss": 0.7918, "step": 16352 }, { "epoch": 0.5776174560353925, "grad_norm": 1.5652861595153809, "learning_rate": 3.9930465114183966e-06, "loss": 0.7956, "step": 16353 }, { "epoch": 0.5776527778391004, "grad_norm": 1.5457305908203125, "learning_rate": 3.992486228044704e-06, "loss": 0.7944, "step": 16354 }, { "epoch": 0.5776880996428082, "grad_norm": 1.6702595949172974, "learning_rate": 3.991925957857161e-06, "loss": 0.7682, "step": 16355 }, { "epoch": 0.5777234214465161, "grad_norm": 1.7264152765274048, "learning_rate": 3.9913657008631e-06, "loss": 0.8045, "step": 16356 }, { "epoch": 0.577758743250224, "grad_norm": 1.6409471035003662, "learning_rate": 3.990805457069856e-06, "loss": 0.7792, "step": 16357 }, { "epoch": 0.5777940650539319, "grad_norm": 1.6063339710235596, "learning_rate": 3.990245226484759e-06, "loss": 0.7689, "step": 16358 }, { "epoch": 0.5778293868576398, "grad_norm": 1.622552514076233, "learning_rate": 3.989685009115145e-06, "loss": 0.8172, "step": 16359 }, { "epoch": 0.5778647086613478, "grad_norm": 1.7709779739379883, "learning_rate": 3.9891248049683436e-06, "loss": 0.7745, "step": 16360 }, { "epoch": 0.5779000304650557, "grad_norm": 1.5262309312820435, "learning_rate": 3.988564614051685e-06, "loss": 0.8205, "step": 16361 }, { "epoch": 0.5779353522687636, "grad_norm": 1.6728641986846924, "learning_rate": 3.988004436372503e-06, "loss": 0.8125, "step": 16362 }, { "epoch": 0.5779706740724715, "grad_norm": 1.7726484537124634, "learning_rate": 3.98744427193813e-06, "loss": 0.7625, "step": 16363 }, { "epoch": 0.5780059958761794, "grad_norm": 1.9678488969802856, "learning_rate": 3.986884120755895e-06, "loss": 0.8128, "step": 16364 }, { "epoch": 0.5780413176798873, "grad_norm": 1.6903550624847412, "learning_rate": 3.986323982833131e-06, "loss": 0.767, "step": 16365 }, { "epoch": 0.5780766394835952, "grad_norm": 1.706202745437622, "learning_rate": 3.985763858177167e-06, "loss": 0.8023, "step": 16366 }, { "epoch": 0.5781119612873031, "grad_norm": 1.577995777130127, "learning_rate": 3.985203746795335e-06, "loss": 0.7656, "step": 16367 }, { "epoch": 0.578147283091011, "grad_norm": 1.7987310886383057, "learning_rate": 3.984643648694967e-06, "loss": 0.8123, "step": 16368 }, { "epoch": 0.578182604894719, "grad_norm": 1.6945953369140625, "learning_rate": 3.984083563883391e-06, "loss": 0.7794, "step": 16369 }, { "epoch": 0.5782179266984269, "grad_norm": 1.5661208629608154, "learning_rate": 3.9835234923679386e-06, "loss": 0.7855, "step": 16370 }, { "epoch": 0.5782532485021348, "grad_norm": 1.61007821559906, "learning_rate": 3.9829634341559396e-06, "loss": 0.7992, "step": 16371 }, { "epoch": 0.5782885703058427, "grad_norm": 2.2818808555603027, "learning_rate": 3.982403389254725e-06, "loss": 0.8, "step": 16372 }, { "epoch": 0.5783238921095506, "grad_norm": 1.7757766246795654, "learning_rate": 3.981843357671623e-06, "loss": 0.7697, "step": 16373 }, { "epoch": 0.5783592139132585, "grad_norm": 1.800736904144287, "learning_rate": 3.981283339413963e-06, "loss": 0.7944, "step": 16374 }, { "epoch": 0.5783945357169664, "grad_norm": 1.7232460975646973, "learning_rate": 3.980723334489075e-06, "loss": 0.8021, "step": 16375 }, { "epoch": 0.5784298575206743, "grad_norm": 1.5792516469955444, "learning_rate": 3.98016334290429e-06, "loss": 0.7696, "step": 16376 }, { "epoch": 0.5784651793243822, "grad_norm": 1.6183096170425415, "learning_rate": 3.979603364666933e-06, "loss": 0.7935, "step": 16377 }, { "epoch": 0.5785005011280901, "grad_norm": 1.5607318878173828, "learning_rate": 3.979043399784337e-06, "loss": 0.7624, "step": 16378 }, { "epoch": 0.5785358229317981, "grad_norm": 1.5954023599624634, "learning_rate": 3.97848344826383e-06, "loss": 0.7794, "step": 16379 }, { "epoch": 0.578571144735506, "grad_norm": 1.7099716663360596, "learning_rate": 3.977923510112737e-06, "loss": 0.7632, "step": 16380 }, { "epoch": 0.5786064665392138, "grad_norm": 1.6473008394241333, "learning_rate": 3.977363585338391e-06, "loss": 0.799, "step": 16381 }, { "epoch": 0.5786417883429217, "grad_norm": 1.87352454662323, "learning_rate": 3.976803673948116e-06, "loss": 0.7872, "step": 16382 }, { "epoch": 0.5786771101466296, "grad_norm": 1.7008455991744995, "learning_rate": 3.976243775949245e-06, "loss": 0.7743, "step": 16383 }, { "epoch": 0.5787124319503375, "grad_norm": 1.830657720565796, "learning_rate": 3.975683891349101e-06, "loss": 0.8246, "step": 16384 }, { "epoch": 0.5787477537540454, "grad_norm": 2.0892703533172607, "learning_rate": 3.975124020155014e-06, "loss": 0.7467, "step": 16385 }, { "epoch": 0.5787830755577533, "grad_norm": 1.7758692502975464, "learning_rate": 3.974564162374312e-06, "loss": 0.7699, "step": 16386 }, { "epoch": 0.5788183973614612, "grad_norm": 1.661578893661499, "learning_rate": 3.974004318014321e-06, "loss": 0.7806, "step": 16387 }, { "epoch": 0.5788537191651691, "grad_norm": 1.672005295753479, "learning_rate": 3.973444487082369e-06, "loss": 0.7836, "step": 16388 }, { "epoch": 0.5788890409688771, "grad_norm": 1.7002785205841064, "learning_rate": 3.972884669585782e-06, "loss": 0.7962, "step": 16389 }, { "epoch": 0.578924362772585, "grad_norm": 1.6385737657546997, "learning_rate": 3.9723248655318875e-06, "loss": 0.8195, "step": 16390 }, { "epoch": 0.5789596845762929, "grad_norm": 1.721251130104065, "learning_rate": 3.9717650749280135e-06, "loss": 0.8092, "step": 16391 }, { "epoch": 0.5789950063800008, "grad_norm": 1.575564980506897, "learning_rate": 3.971205297781483e-06, "loss": 0.7688, "step": 16392 }, { "epoch": 0.5790303281837087, "grad_norm": 1.7428796291351318, "learning_rate": 3.9706455340996246e-06, "loss": 0.7902, "step": 16393 }, { "epoch": 0.5790656499874166, "grad_norm": 2.6655330657958984, "learning_rate": 3.970085783889764e-06, "loss": 0.7633, "step": 16394 }, { "epoch": 0.5791009717911245, "grad_norm": 1.8678741455078125, "learning_rate": 3.969526047159227e-06, "loss": 0.7892, "step": 16395 }, { "epoch": 0.5791362935948324, "grad_norm": 1.5422756671905518, "learning_rate": 3.96896632391534e-06, "loss": 0.7724, "step": 16396 }, { "epoch": 0.5791716153985403, "grad_norm": 2.1250555515289307, "learning_rate": 3.968406614165427e-06, "loss": 0.7277, "step": 16397 }, { "epoch": 0.5792069372022483, "grad_norm": 1.963323712348938, "learning_rate": 3.967846917916814e-06, "loss": 0.7869, "step": 16398 }, { "epoch": 0.5792422590059562, "grad_norm": 1.8135994672775269, "learning_rate": 3.967287235176828e-06, "loss": 0.8222, "step": 16399 }, { "epoch": 0.5792775808096641, "grad_norm": 1.6587246656417847, "learning_rate": 3.966727565952791e-06, "loss": 0.8359, "step": 16400 }, { "epoch": 0.579312902613372, "grad_norm": 1.5672435760498047, "learning_rate": 3.96616791025203e-06, "loss": 0.7739, "step": 16401 }, { "epoch": 0.5793482244170799, "grad_norm": 1.652982234954834, "learning_rate": 3.96560826808187e-06, "loss": 0.8043, "step": 16402 }, { "epoch": 0.5793835462207878, "grad_norm": 1.8092247247695923, "learning_rate": 3.965048639449633e-06, "loss": 0.8327, "step": 16403 }, { "epoch": 0.5794188680244957, "grad_norm": 1.8878005743026733, "learning_rate": 3.964489024362645e-06, "loss": 0.7347, "step": 16404 }, { "epoch": 0.5794541898282036, "grad_norm": 1.770218849182129, "learning_rate": 3.963929422828232e-06, "loss": 0.7763, "step": 16405 }, { "epoch": 0.5794895116319115, "grad_norm": 1.8696784973144531, "learning_rate": 3.963369834853713e-06, "loss": 0.7875, "step": 16406 }, { "epoch": 0.5795248334356193, "grad_norm": 1.648929238319397, "learning_rate": 3.962810260446417e-06, "loss": 0.7893, "step": 16407 }, { "epoch": 0.5795601552393272, "grad_norm": 1.7496843338012695, "learning_rate": 3.9622506996136645e-06, "loss": 0.7609, "step": 16408 }, { "epoch": 0.5795954770430352, "grad_norm": 1.6153044700622559, "learning_rate": 3.9616911523627815e-06, "loss": 0.8076, "step": 16409 }, { "epoch": 0.5796307988467431, "grad_norm": 1.7499244213104248, "learning_rate": 3.9611316187010885e-06, "loss": 0.783, "step": 16410 }, { "epoch": 0.579666120650451, "grad_norm": 1.5938661098480225, "learning_rate": 3.960572098635908e-06, "loss": 0.7927, "step": 16411 }, { "epoch": 0.5797014424541589, "grad_norm": 1.7523976564407349, "learning_rate": 3.960012592174566e-06, "loss": 0.8225, "step": 16412 }, { "epoch": 0.5797367642578668, "grad_norm": 1.780177354812622, "learning_rate": 3.959453099324383e-06, "loss": 0.7645, "step": 16413 }, { "epoch": 0.5797720860615747, "grad_norm": 1.7133290767669678, "learning_rate": 3.958893620092682e-06, "loss": 0.7638, "step": 16414 }, { "epoch": 0.5798074078652826, "grad_norm": 1.495703935623169, "learning_rate": 3.958334154486786e-06, "loss": 0.7583, "step": 16415 }, { "epoch": 0.5798427296689905, "grad_norm": 1.873002290725708, "learning_rate": 3.957774702514016e-06, "loss": 0.7932, "step": 16416 }, { "epoch": 0.5798780514726984, "grad_norm": 1.6909488439559937, "learning_rate": 3.957215264181696e-06, "loss": 0.7586, "step": 16417 }, { "epoch": 0.5799133732764064, "grad_norm": 1.762476921081543, "learning_rate": 3.956655839497147e-06, "loss": 0.8288, "step": 16418 }, { "epoch": 0.5799486950801143, "grad_norm": 1.6994529962539673, "learning_rate": 3.956096428467688e-06, "loss": 0.7777, "step": 16419 }, { "epoch": 0.5799840168838222, "grad_norm": 0.9358137249946594, "learning_rate": 3.955537031100644e-06, "loss": 0.5895, "step": 16420 }, { "epoch": 0.5800193386875301, "grad_norm": 1.6507242918014526, "learning_rate": 3.954977647403336e-06, "loss": 0.7781, "step": 16421 }, { "epoch": 0.580054660491238, "grad_norm": 1.7553638219833374, "learning_rate": 3.954418277383082e-06, "loss": 0.8079, "step": 16422 }, { "epoch": 0.5800899822949459, "grad_norm": 1.6578550338745117, "learning_rate": 3.953858921047207e-06, "loss": 0.8089, "step": 16423 }, { "epoch": 0.5801253040986538, "grad_norm": 2.006340265274048, "learning_rate": 3.953299578403029e-06, "loss": 0.7933, "step": 16424 }, { "epoch": 0.5801606259023617, "grad_norm": 4.331729888916016, "learning_rate": 3.952740249457869e-06, "loss": 0.7602, "step": 16425 }, { "epoch": 0.5801959477060696, "grad_norm": 1.5608882904052734, "learning_rate": 3.952180934219048e-06, "loss": 0.7674, "step": 16426 }, { "epoch": 0.5802312695097775, "grad_norm": 2.471107244491577, "learning_rate": 3.951621632693887e-06, "loss": 0.8292, "step": 16427 }, { "epoch": 0.5802665913134855, "grad_norm": 0.9343419075012207, "learning_rate": 3.951062344889705e-06, "loss": 0.6072, "step": 16428 }, { "epoch": 0.5803019131171934, "grad_norm": 1.6808698177337646, "learning_rate": 3.95050307081382e-06, "loss": 0.7743, "step": 16429 }, { "epoch": 0.5803372349209013, "grad_norm": 1.6224117279052734, "learning_rate": 3.949943810473555e-06, "loss": 0.7903, "step": 16430 }, { "epoch": 0.5803725567246092, "grad_norm": 2.061366081237793, "learning_rate": 3.949384563876228e-06, "loss": 0.8577, "step": 16431 }, { "epoch": 0.5804078785283171, "grad_norm": 1.666774034500122, "learning_rate": 3.948825331029157e-06, "loss": 0.7743, "step": 16432 }, { "epoch": 0.5804432003320249, "grad_norm": 1.6555887460708618, "learning_rate": 3.948266111939664e-06, "loss": 0.7819, "step": 16433 }, { "epoch": 0.5804785221357328, "grad_norm": 2.1516976356506348, "learning_rate": 3.947706906615066e-06, "loss": 0.765, "step": 16434 }, { "epoch": 0.5805138439394407, "grad_norm": 1.689307689666748, "learning_rate": 3.947147715062681e-06, "loss": 0.7948, "step": 16435 }, { "epoch": 0.5805491657431486, "grad_norm": 1.8910619020462036, "learning_rate": 3.94658853728983e-06, "loss": 0.8269, "step": 16436 }, { "epoch": 0.5805844875468565, "grad_norm": 2.0071732997894287, "learning_rate": 3.9460293733038304e-06, "loss": 0.8057, "step": 16437 }, { "epoch": 0.5806198093505645, "grad_norm": 1.5797663927078247, "learning_rate": 3.945470223111999e-06, "loss": 0.7658, "step": 16438 }, { "epoch": 0.5806551311542724, "grad_norm": 2.7928221225738525, "learning_rate": 3.944911086721656e-06, "loss": 0.7677, "step": 16439 }, { "epoch": 0.5806904529579803, "grad_norm": 1.7028864622116089, "learning_rate": 3.9443519641401176e-06, "loss": 0.7654, "step": 16440 }, { "epoch": 0.5807257747616882, "grad_norm": 1.9727132320404053, "learning_rate": 3.943792855374703e-06, "loss": 0.782, "step": 16441 }, { "epoch": 0.5807610965653961, "grad_norm": 1.9014244079589844, "learning_rate": 3.943233760432729e-06, "loss": 0.8141, "step": 16442 }, { "epoch": 0.580796418369104, "grad_norm": 1.8008382320404053, "learning_rate": 3.942674679321512e-06, "loss": 0.8164, "step": 16443 }, { "epoch": 0.5808317401728119, "grad_norm": 1.9352827072143555, "learning_rate": 3.942115612048371e-06, "loss": 0.7785, "step": 16444 }, { "epoch": 0.5808670619765198, "grad_norm": 1.6416599750518799, "learning_rate": 3.9415565586206226e-06, "loss": 0.7687, "step": 16445 }, { "epoch": 0.5809023837802277, "grad_norm": 2.302334785461426, "learning_rate": 3.940997519045583e-06, "loss": 0.816, "step": 16446 }, { "epoch": 0.5809377055839356, "grad_norm": 2.008528709411621, "learning_rate": 3.940438493330568e-06, "loss": 0.7862, "step": 16447 }, { "epoch": 0.5809730273876436, "grad_norm": 1.755016565322876, "learning_rate": 3.939879481482894e-06, "loss": 0.8263, "step": 16448 }, { "epoch": 0.5810083491913515, "grad_norm": 1.6485579013824463, "learning_rate": 3.9393204835098795e-06, "loss": 0.7777, "step": 16449 }, { "epoch": 0.5810436709950594, "grad_norm": 1.9251987934112549, "learning_rate": 3.938761499418838e-06, "loss": 0.7872, "step": 16450 }, { "epoch": 0.5810789927987673, "grad_norm": 2.035980701446533, "learning_rate": 3.938202529217087e-06, "loss": 0.7718, "step": 16451 }, { "epoch": 0.5811143146024752, "grad_norm": 1.6603096723556519, "learning_rate": 3.937643572911942e-06, "loss": 0.7581, "step": 16452 }, { "epoch": 0.5811496364061831, "grad_norm": 1.5902217626571655, "learning_rate": 3.937084630510717e-06, "loss": 0.7723, "step": 16453 }, { "epoch": 0.581184958209891, "grad_norm": 1.678226351737976, "learning_rate": 3.93652570202073e-06, "loss": 0.728, "step": 16454 }, { "epoch": 0.5812202800135989, "grad_norm": 0.9055976867675781, "learning_rate": 3.935966787449294e-06, "loss": 0.5922, "step": 16455 }, { "epoch": 0.5812556018173068, "grad_norm": 1.7654304504394531, "learning_rate": 3.935407886803725e-06, "loss": 0.7933, "step": 16456 }, { "epoch": 0.5812909236210148, "grad_norm": 1.6065911054611206, "learning_rate": 3.934849000091337e-06, "loss": 0.7598, "step": 16457 }, { "epoch": 0.5813262454247227, "grad_norm": 1.607569932937622, "learning_rate": 3.934290127319446e-06, "loss": 0.7707, "step": 16458 }, { "epoch": 0.5813615672284305, "grad_norm": 1.6774303913116455, "learning_rate": 3.933731268495364e-06, "loss": 0.7607, "step": 16459 }, { "epoch": 0.5813968890321384, "grad_norm": 1.7357412576675415, "learning_rate": 3.933172423626408e-06, "loss": 0.7924, "step": 16460 }, { "epoch": 0.5814322108358463, "grad_norm": 1.6065720319747925, "learning_rate": 3.93261359271989e-06, "loss": 0.7953, "step": 16461 }, { "epoch": 0.5814675326395542, "grad_norm": 1.5694323778152466, "learning_rate": 3.932054775783124e-06, "loss": 0.7699, "step": 16462 }, { "epoch": 0.5815028544432621, "grad_norm": 1.5687257051467896, "learning_rate": 3.931495972823427e-06, "loss": 0.771, "step": 16463 }, { "epoch": 0.58153817624697, "grad_norm": 1.586220622062683, "learning_rate": 3.930937183848107e-06, "loss": 0.7891, "step": 16464 }, { "epoch": 0.5815734980506779, "grad_norm": 1.6645455360412598, "learning_rate": 3.930378408864484e-06, "loss": 0.8044, "step": 16465 }, { "epoch": 0.5816088198543858, "grad_norm": 2.1241204738616943, "learning_rate": 3.929819647879863e-06, "loss": 0.7652, "step": 16466 }, { "epoch": 0.5816441416580937, "grad_norm": 1.7875134944915771, "learning_rate": 3.929260900901564e-06, "loss": 0.7946, "step": 16467 }, { "epoch": 0.5816794634618017, "grad_norm": 1.6956253051757812, "learning_rate": 3.928702167936896e-06, "loss": 0.7435, "step": 16468 }, { "epoch": 0.5817147852655096, "grad_norm": 2.3307080268859863, "learning_rate": 3.928143448993172e-06, "loss": 0.7979, "step": 16469 }, { "epoch": 0.5817501070692175, "grad_norm": 1.8274128437042236, "learning_rate": 3.927584744077706e-06, "loss": 0.7854, "step": 16470 }, { "epoch": 0.5817854288729254, "grad_norm": 1.517998456954956, "learning_rate": 3.927026053197809e-06, "loss": 0.7407, "step": 16471 }, { "epoch": 0.5818207506766333, "grad_norm": 1.6728565692901611, "learning_rate": 3.926467376360793e-06, "loss": 0.8207, "step": 16472 }, { "epoch": 0.5818560724803412, "grad_norm": 1.6047844886779785, "learning_rate": 3.92590871357397e-06, "loss": 0.768, "step": 16473 }, { "epoch": 0.5818913942840491, "grad_norm": 1.801099181175232, "learning_rate": 3.925350064844652e-06, "loss": 0.7758, "step": 16474 }, { "epoch": 0.581926716087757, "grad_norm": 1.664310097694397, "learning_rate": 3.924791430180151e-06, "loss": 0.7839, "step": 16475 }, { "epoch": 0.581962037891465, "grad_norm": 1.6652908325195312, "learning_rate": 3.924232809587778e-06, "loss": 0.7813, "step": 16476 }, { "epoch": 0.5819973596951729, "grad_norm": 1.6677892208099365, "learning_rate": 3.923674203074842e-06, "loss": 0.7812, "step": 16477 }, { "epoch": 0.5820326814988808, "grad_norm": 1.6216858625411987, "learning_rate": 3.923115610648656e-06, "loss": 0.789, "step": 16478 }, { "epoch": 0.5820680033025887, "grad_norm": 1.6304353475570679, "learning_rate": 3.922557032316532e-06, "loss": 0.8322, "step": 16479 }, { "epoch": 0.5821033251062966, "grad_norm": 1.640487551689148, "learning_rate": 3.921998468085778e-06, "loss": 0.7892, "step": 16480 }, { "epoch": 0.5821386469100045, "grad_norm": 1.8988673686981201, "learning_rate": 3.921439917963706e-06, "loss": 0.7902, "step": 16481 }, { "epoch": 0.5821739687137124, "grad_norm": 1.8158210515975952, "learning_rate": 3.920881381957625e-06, "loss": 0.7966, "step": 16482 }, { "epoch": 0.5822092905174203, "grad_norm": 2.197255849838257, "learning_rate": 3.920322860074848e-06, "loss": 0.7865, "step": 16483 }, { "epoch": 0.5822446123211282, "grad_norm": 1.7821885347366333, "learning_rate": 3.919764352322679e-06, "loss": 0.7655, "step": 16484 }, { "epoch": 0.582279934124836, "grad_norm": 1.69890296459198, "learning_rate": 3.919205858708433e-06, "loss": 0.7969, "step": 16485 }, { "epoch": 0.5823152559285439, "grad_norm": 1.6954824924468994, "learning_rate": 3.918647379239417e-06, "loss": 0.7897, "step": 16486 }, { "epoch": 0.5823505777322519, "grad_norm": 1.702760934829712, "learning_rate": 3.91808891392294e-06, "loss": 0.8304, "step": 16487 }, { "epoch": 0.5823858995359598, "grad_norm": 1.7801923751831055, "learning_rate": 3.917530462766313e-06, "loss": 0.7762, "step": 16488 }, { "epoch": 0.5824212213396677, "grad_norm": 1.533962607383728, "learning_rate": 3.916972025776844e-06, "loss": 0.7639, "step": 16489 }, { "epoch": 0.5824565431433756, "grad_norm": 1.6090824604034424, "learning_rate": 3.91641360296184e-06, "loss": 0.773, "step": 16490 }, { "epoch": 0.5824918649470835, "grad_norm": 1.7694815397262573, "learning_rate": 3.915855194328612e-06, "loss": 0.7638, "step": 16491 }, { "epoch": 0.5825271867507914, "grad_norm": 1.5974289178848267, "learning_rate": 3.915296799884469e-06, "loss": 0.7984, "step": 16492 }, { "epoch": 0.5825625085544993, "grad_norm": 1.5671920776367188, "learning_rate": 3.914738419636715e-06, "loss": 0.7893, "step": 16493 }, { "epoch": 0.5825978303582072, "grad_norm": 1.5702950954437256, "learning_rate": 3.914180053592662e-06, "loss": 0.7847, "step": 16494 }, { "epoch": 0.5826331521619151, "grad_norm": 1.5933454036712646, "learning_rate": 3.9136217017596166e-06, "loss": 0.7849, "step": 16495 }, { "epoch": 0.582668473965623, "grad_norm": 1.7515101432800293, "learning_rate": 3.9130633641448845e-06, "loss": 0.8038, "step": 16496 }, { "epoch": 0.582703795769331, "grad_norm": 1.632307767868042, "learning_rate": 3.912505040755776e-06, "loss": 0.7574, "step": 16497 }, { "epoch": 0.5827391175730389, "grad_norm": 1.5719839334487915, "learning_rate": 3.911946731599597e-06, "loss": 0.7872, "step": 16498 }, { "epoch": 0.5827744393767468, "grad_norm": 1.7458722591400146, "learning_rate": 3.911388436683655e-06, "loss": 0.8146, "step": 16499 }, { "epoch": 0.5828097611804547, "grad_norm": 2.0012407302856445, "learning_rate": 3.910830156015256e-06, "loss": 0.78, "step": 16500 }, { "epoch": 0.5828450829841626, "grad_norm": 1.5001689195632935, "learning_rate": 3.9102718896017064e-06, "loss": 0.7945, "step": 16501 }, { "epoch": 0.5828804047878705, "grad_norm": 1.6253398656845093, "learning_rate": 3.909713637450317e-06, "loss": 0.7857, "step": 16502 }, { "epoch": 0.5829157265915784, "grad_norm": 1.7490299940109253, "learning_rate": 3.909155399568387e-06, "loss": 0.7876, "step": 16503 }, { "epoch": 0.5829510483952863, "grad_norm": 1.6612452268600464, "learning_rate": 3.908597175963228e-06, "loss": 0.7808, "step": 16504 }, { "epoch": 0.5829863701989942, "grad_norm": 1.6296485662460327, "learning_rate": 3.908038966642143e-06, "loss": 0.7926, "step": 16505 }, { "epoch": 0.5830216920027022, "grad_norm": 1.9180601835250854, "learning_rate": 3.907480771612438e-06, "loss": 0.794, "step": 16506 }, { "epoch": 0.5830570138064101, "grad_norm": 1.7098896503448486, "learning_rate": 3.90692259088142e-06, "loss": 0.7944, "step": 16507 }, { "epoch": 0.583092335610118, "grad_norm": 1.799737811088562, "learning_rate": 3.906364424456393e-06, "loss": 0.8006, "step": 16508 }, { "epoch": 0.5831276574138259, "grad_norm": 1.8143736124038696, "learning_rate": 3.9058062723446635e-06, "loss": 0.8115, "step": 16509 }, { "epoch": 0.5831629792175338, "grad_norm": 1.6607933044433594, "learning_rate": 3.905248134553537e-06, "loss": 0.7881, "step": 16510 }, { "epoch": 0.5831983010212417, "grad_norm": 1.6835075616836548, "learning_rate": 3.904690011090314e-06, "loss": 0.7965, "step": 16511 }, { "epoch": 0.5832336228249495, "grad_norm": 1.6497180461883545, "learning_rate": 3.904131901962306e-06, "loss": 0.8015, "step": 16512 }, { "epoch": 0.5832689446286574, "grad_norm": 1.5445818901062012, "learning_rate": 3.903573807176811e-06, "loss": 0.7722, "step": 16513 }, { "epoch": 0.5833042664323653, "grad_norm": 1.5909810066223145, "learning_rate": 3.903015726741137e-06, "loss": 0.775, "step": 16514 }, { "epoch": 0.5833395882360732, "grad_norm": 1.8287122249603271, "learning_rate": 3.902457660662586e-06, "loss": 0.7916, "step": 16515 }, { "epoch": 0.5833749100397811, "grad_norm": 1.7495213747024536, "learning_rate": 3.9018996089484644e-06, "loss": 0.7826, "step": 16516 }, { "epoch": 0.5834102318434891, "grad_norm": 2.9043619632720947, "learning_rate": 3.901341571606073e-06, "loss": 0.8035, "step": 16517 }, { "epoch": 0.583445553647197, "grad_norm": 1.8707507848739624, "learning_rate": 3.900783548642717e-06, "loss": 0.8005, "step": 16518 }, { "epoch": 0.5834808754509049, "grad_norm": 1.6439430713653564, "learning_rate": 3.9002255400656995e-06, "loss": 0.7615, "step": 16519 }, { "epoch": 0.5835161972546128, "grad_norm": 2.1005425453186035, "learning_rate": 3.899667545882325e-06, "loss": 0.7882, "step": 16520 }, { "epoch": 0.5835515190583207, "grad_norm": 1.6126062870025635, "learning_rate": 3.899109566099892e-06, "loss": 0.7768, "step": 16521 }, { "epoch": 0.5835868408620286, "grad_norm": 1.6208422183990479, "learning_rate": 3.898551600725707e-06, "loss": 0.7797, "step": 16522 }, { "epoch": 0.5836221626657365, "grad_norm": 1.5681214332580566, "learning_rate": 3.897993649767072e-06, "loss": 0.7992, "step": 16523 }, { "epoch": 0.5836574844694444, "grad_norm": 1.6757490634918213, "learning_rate": 3.897435713231289e-06, "loss": 0.7909, "step": 16524 }, { "epoch": 0.5836928062731523, "grad_norm": 1.695145606994629, "learning_rate": 3.896877791125659e-06, "loss": 0.8016, "step": 16525 }, { "epoch": 0.5837281280768603, "grad_norm": 1.7779207229614258, "learning_rate": 3.896319883457486e-06, "loss": 0.792, "step": 16526 }, { "epoch": 0.5837634498805682, "grad_norm": 1.6245967149734497, "learning_rate": 3.89576199023407e-06, "loss": 0.7803, "step": 16527 }, { "epoch": 0.5837987716842761, "grad_norm": 0.9582402110099792, "learning_rate": 3.895204111462715e-06, "loss": 0.5903, "step": 16528 }, { "epoch": 0.583834093487984, "grad_norm": 1.6301254034042358, "learning_rate": 3.894646247150721e-06, "loss": 0.806, "step": 16529 }, { "epoch": 0.5838694152916919, "grad_norm": 1.7893558740615845, "learning_rate": 3.894088397305387e-06, "loss": 0.8104, "step": 16530 }, { "epoch": 0.5839047370953998, "grad_norm": 1.7069112062454224, "learning_rate": 3.893530561934018e-06, "loss": 0.7853, "step": 16531 }, { "epoch": 0.5839400588991077, "grad_norm": 1.689089298248291, "learning_rate": 3.892972741043912e-06, "loss": 0.7735, "step": 16532 }, { "epoch": 0.5839753807028156, "grad_norm": 1.6816591024398804, "learning_rate": 3.8924149346423715e-06, "loss": 0.788, "step": 16533 }, { "epoch": 0.5840107025065235, "grad_norm": 1.5408294200897217, "learning_rate": 3.891857142736696e-06, "loss": 0.7397, "step": 16534 }, { "epoch": 0.5840460243102314, "grad_norm": 1.479402780532837, "learning_rate": 3.891299365334185e-06, "loss": 0.7835, "step": 16535 }, { "epoch": 0.5840813461139394, "grad_norm": 1.7477179765701294, "learning_rate": 3.890741602442141e-06, "loss": 0.7802, "step": 16536 }, { "epoch": 0.5841166679176473, "grad_norm": 1.5863720178604126, "learning_rate": 3.8901838540678614e-06, "loss": 0.7596, "step": 16537 }, { "epoch": 0.5841519897213551, "grad_norm": 1.6201441287994385, "learning_rate": 3.889626120218646e-06, "loss": 0.8074, "step": 16538 }, { "epoch": 0.584187311525063, "grad_norm": 1.694315791130066, "learning_rate": 3.889068400901799e-06, "loss": 0.7924, "step": 16539 }, { "epoch": 0.5842226333287709, "grad_norm": 1.7686406373977661, "learning_rate": 3.888510696124612e-06, "loss": 0.7944, "step": 16540 }, { "epoch": 0.5842579551324788, "grad_norm": 0.9842403531074524, "learning_rate": 3.88795300589439e-06, "loss": 0.5959, "step": 16541 }, { "epoch": 0.5842932769361867, "grad_norm": 1.5332692861557007, "learning_rate": 3.887395330218429e-06, "loss": 0.7723, "step": 16542 }, { "epoch": 0.5843285987398946, "grad_norm": 1.56392502784729, "learning_rate": 3.886837669104029e-06, "loss": 0.8015, "step": 16543 }, { "epoch": 0.5843639205436025, "grad_norm": 1.6101791858673096, "learning_rate": 3.886280022558489e-06, "loss": 0.7685, "step": 16544 }, { "epoch": 0.5843992423473104, "grad_norm": 1.5913830995559692, "learning_rate": 3.885722390589105e-06, "loss": 0.8118, "step": 16545 }, { "epoch": 0.5844345641510184, "grad_norm": 1.6814754009246826, "learning_rate": 3.885164773203178e-06, "loss": 0.7879, "step": 16546 }, { "epoch": 0.5844698859547263, "grad_norm": 1.6248830556869507, "learning_rate": 3.884607170408006e-06, "loss": 0.7663, "step": 16547 }, { "epoch": 0.5845052077584342, "grad_norm": 1.8007668256759644, "learning_rate": 3.884049582210883e-06, "loss": 0.7836, "step": 16548 }, { "epoch": 0.5845405295621421, "grad_norm": 1.666858196258545, "learning_rate": 3.883492008619112e-06, "loss": 0.7927, "step": 16549 }, { "epoch": 0.58457585136585, "grad_norm": 1.8291397094726562, "learning_rate": 3.882934449639986e-06, "loss": 0.7853, "step": 16550 }, { "epoch": 0.5846111731695579, "grad_norm": 1.7080386877059937, "learning_rate": 3.882376905280805e-06, "loss": 0.8041, "step": 16551 }, { "epoch": 0.5846464949732658, "grad_norm": 1.6293174028396606, "learning_rate": 3.881819375548864e-06, "loss": 0.8117, "step": 16552 }, { "epoch": 0.5846818167769737, "grad_norm": 1.898776650428772, "learning_rate": 3.8812618604514615e-06, "loss": 0.7648, "step": 16553 }, { "epoch": 0.5847171385806816, "grad_norm": 1.5860899686813354, "learning_rate": 3.880704359995892e-06, "loss": 0.7441, "step": 16554 }, { "epoch": 0.5847524603843895, "grad_norm": 2.825798273086548, "learning_rate": 3.880146874189456e-06, "loss": 0.7996, "step": 16555 }, { "epoch": 0.5847877821880975, "grad_norm": 1.6936591863632202, "learning_rate": 3.879589403039445e-06, "loss": 0.7668, "step": 16556 }, { "epoch": 0.5848231039918054, "grad_norm": 1.712668538093567, "learning_rate": 3.879031946553161e-06, "loss": 0.8151, "step": 16557 }, { "epoch": 0.5848584257955133, "grad_norm": 1.587181568145752, "learning_rate": 3.878474504737893e-06, "loss": 0.7637, "step": 16558 }, { "epoch": 0.5848937475992212, "grad_norm": 1.5471699237823486, "learning_rate": 3.87791707760094e-06, "loss": 0.7472, "step": 16559 }, { "epoch": 0.5849290694029291, "grad_norm": 1.7706711292266846, "learning_rate": 3.877359665149598e-06, "loss": 0.7638, "step": 16560 }, { "epoch": 0.584964391206637, "grad_norm": 1.6545302867889404, "learning_rate": 3.87680226739116e-06, "loss": 0.7708, "step": 16561 }, { "epoch": 0.5849997130103449, "grad_norm": 1.6203967332839966, "learning_rate": 3.876244884332925e-06, "loss": 0.789, "step": 16562 }, { "epoch": 0.5850350348140528, "grad_norm": 1.6669501066207886, "learning_rate": 3.875687515982185e-06, "loss": 0.7694, "step": 16563 }, { "epoch": 0.5850703566177606, "grad_norm": 1.5843830108642578, "learning_rate": 3.875130162346234e-06, "loss": 0.8209, "step": 16564 }, { "epoch": 0.5851056784214685, "grad_norm": 1.7506067752838135, "learning_rate": 3.87457282343237e-06, "loss": 0.7906, "step": 16565 }, { "epoch": 0.5851410002251765, "grad_norm": 1.7520134449005127, "learning_rate": 3.8740154992478834e-06, "loss": 0.8195, "step": 16566 }, { "epoch": 0.5851763220288844, "grad_norm": 1.566972017288208, "learning_rate": 3.873458189800072e-06, "loss": 0.7859, "step": 16567 }, { "epoch": 0.5852116438325923, "grad_norm": 1.6101086139678955, "learning_rate": 3.872900895096227e-06, "loss": 0.7579, "step": 16568 }, { "epoch": 0.5852469656363002, "grad_norm": 1.500331163406372, "learning_rate": 3.872343615143643e-06, "loss": 0.7831, "step": 16569 }, { "epoch": 0.5852822874400081, "grad_norm": 1.6210973262786865, "learning_rate": 3.871786349949615e-06, "loss": 0.7551, "step": 16570 }, { "epoch": 0.585317609243716, "grad_norm": 1.6374627351760864, "learning_rate": 3.871229099521435e-06, "loss": 0.7901, "step": 16571 }, { "epoch": 0.5853529310474239, "grad_norm": 1.596299648284912, "learning_rate": 3.8706718638663954e-06, "loss": 0.7725, "step": 16572 }, { "epoch": 0.5853882528511318, "grad_norm": 1.748783826828003, "learning_rate": 3.870114642991791e-06, "loss": 0.7774, "step": 16573 }, { "epoch": 0.5854235746548397, "grad_norm": 1.778895616531372, "learning_rate": 3.869557436904914e-06, "loss": 0.7919, "step": 16574 }, { "epoch": 0.5854588964585477, "grad_norm": 1.8926520347595215, "learning_rate": 3.869000245613057e-06, "loss": 0.7885, "step": 16575 }, { "epoch": 0.5854942182622556, "grad_norm": 1.6782803535461426, "learning_rate": 3.8684430691235105e-06, "loss": 0.8049, "step": 16576 }, { "epoch": 0.5855295400659635, "grad_norm": 1.5092887878417969, "learning_rate": 3.8678859074435705e-06, "loss": 0.7331, "step": 16577 }, { "epoch": 0.5855648618696714, "grad_norm": 1.5786601305007935, "learning_rate": 3.867328760580525e-06, "loss": 0.7948, "step": 16578 }, { "epoch": 0.5856001836733793, "grad_norm": 1.5370047092437744, "learning_rate": 3.866771628541668e-06, "loss": 0.7513, "step": 16579 }, { "epoch": 0.5856355054770872, "grad_norm": 1.6291009187698364, "learning_rate": 3.866214511334292e-06, "loss": 0.7702, "step": 16580 }, { "epoch": 0.5856708272807951, "grad_norm": 1.6914093494415283, "learning_rate": 3.865657408965688e-06, "loss": 0.7808, "step": 16581 }, { "epoch": 0.585706149084503, "grad_norm": 1.9380838871002197, "learning_rate": 3.865100321443143e-06, "loss": 0.7799, "step": 16582 }, { "epoch": 0.5857414708882109, "grad_norm": 1.5958882570266724, "learning_rate": 3.864543248773955e-06, "loss": 0.8057, "step": 16583 }, { "epoch": 0.5857767926919188, "grad_norm": 1.6478804349899292, "learning_rate": 3.863986190965411e-06, "loss": 0.8265, "step": 16584 }, { "epoch": 0.5858121144956268, "grad_norm": 1.5007834434509277, "learning_rate": 3.863429148024801e-06, "loss": 0.7381, "step": 16585 }, { "epoch": 0.5858474362993347, "grad_norm": 1.6810665130615234, "learning_rate": 3.862872119959417e-06, "loss": 0.7823, "step": 16586 }, { "epoch": 0.5858827581030426, "grad_norm": 1.767783522605896, "learning_rate": 3.86231510677655e-06, "loss": 0.815, "step": 16587 }, { "epoch": 0.5859180799067505, "grad_norm": 1.7308094501495361, "learning_rate": 3.861758108483487e-06, "loss": 0.783, "step": 16588 }, { "epoch": 0.5859534017104584, "grad_norm": 1.8342894315719604, "learning_rate": 3.861201125087522e-06, "loss": 0.7826, "step": 16589 }, { "epoch": 0.5859887235141662, "grad_norm": 1.7183974981307983, "learning_rate": 3.8606441565959415e-06, "loss": 0.7719, "step": 16590 }, { "epoch": 0.5860240453178741, "grad_norm": 1.7188466787338257, "learning_rate": 3.860087203016037e-06, "loss": 0.8098, "step": 16591 }, { "epoch": 0.586059367121582, "grad_norm": 1.7019520998001099, "learning_rate": 3.859530264355097e-06, "loss": 0.7833, "step": 16592 }, { "epoch": 0.5860946889252899, "grad_norm": 1.5847034454345703, "learning_rate": 3.8589733406204085e-06, "loss": 0.7634, "step": 16593 }, { "epoch": 0.5861300107289978, "grad_norm": 1.5954598188400269, "learning_rate": 3.8584164318192665e-06, "loss": 0.7747, "step": 16594 }, { "epoch": 0.5861653325327058, "grad_norm": 1.7536311149597168, "learning_rate": 3.857859537958952e-06, "loss": 0.8392, "step": 16595 }, { "epoch": 0.5862006543364137, "grad_norm": 1.7861422300338745, "learning_rate": 3.857302659046759e-06, "loss": 0.7942, "step": 16596 }, { "epoch": 0.5862359761401216, "grad_norm": 1.9183988571166992, "learning_rate": 3.856745795089974e-06, "loss": 0.7838, "step": 16597 }, { "epoch": 0.5862712979438295, "grad_norm": 1.640439748764038, "learning_rate": 3.856188946095882e-06, "loss": 0.7756, "step": 16598 }, { "epoch": 0.5863066197475374, "grad_norm": 1.6365127563476562, "learning_rate": 3.855632112071777e-06, "loss": 0.754, "step": 16599 }, { "epoch": 0.5863419415512453, "grad_norm": 1.9750242233276367, "learning_rate": 3.855075293024942e-06, "loss": 0.7745, "step": 16600 }, { "epoch": 0.5863772633549532, "grad_norm": 1.582003116607666, "learning_rate": 3.8545184889626685e-06, "loss": 0.8087, "step": 16601 }, { "epoch": 0.5864125851586611, "grad_norm": 1.7684452533721924, "learning_rate": 3.853961699892241e-06, "loss": 0.7422, "step": 16602 }, { "epoch": 0.586447906962369, "grad_norm": 1.570104956626892, "learning_rate": 3.853404925820946e-06, "loss": 0.7757, "step": 16603 }, { "epoch": 0.586483228766077, "grad_norm": 1.6278071403503418, "learning_rate": 3.852848166756073e-06, "loss": 0.7511, "step": 16604 }, { "epoch": 0.5865185505697849, "grad_norm": 1.6397405862808228, "learning_rate": 3.8522914227049076e-06, "loss": 0.7879, "step": 16605 }, { "epoch": 0.5865538723734928, "grad_norm": 1.7618597745895386, "learning_rate": 3.851734693674736e-06, "loss": 0.7619, "step": 16606 }, { "epoch": 0.5865891941772007, "grad_norm": 1.759861946105957, "learning_rate": 3.851177979672844e-06, "loss": 0.8276, "step": 16607 }, { "epoch": 0.5866245159809086, "grad_norm": 1.7889606952667236, "learning_rate": 3.8506212807065205e-06, "loss": 0.8168, "step": 16608 }, { "epoch": 0.5866598377846165, "grad_norm": 1.5768402814865112, "learning_rate": 3.850064596783048e-06, "loss": 0.7846, "step": 16609 }, { "epoch": 0.5866951595883244, "grad_norm": 1.5383553504943848, "learning_rate": 3.849507927909715e-06, "loss": 0.7703, "step": 16610 }, { "epoch": 0.5867304813920323, "grad_norm": 1.5146987438201904, "learning_rate": 3.8489512740938055e-06, "loss": 0.7912, "step": 16611 }, { "epoch": 0.5867658031957402, "grad_norm": 1.7828847169876099, "learning_rate": 3.8483946353426075e-06, "loss": 0.7978, "step": 16612 }, { "epoch": 0.5868011249994481, "grad_norm": 1.7483923435211182, "learning_rate": 3.847838011663401e-06, "loss": 0.7952, "step": 16613 }, { "epoch": 0.586836446803156, "grad_norm": 1.8577637672424316, "learning_rate": 3.847281403063475e-06, "loss": 0.7684, "step": 16614 }, { "epoch": 0.586871768606864, "grad_norm": 1.6293504238128662, "learning_rate": 3.846724809550114e-06, "loss": 0.797, "step": 16615 }, { "epoch": 0.5869070904105718, "grad_norm": 1.6833902597427368, "learning_rate": 3.8461682311306e-06, "loss": 0.7755, "step": 16616 }, { "epoch": 0.5869424122142797, "grad_norm": 1.709531545639038, "learning_rate": 3.845611667812221e-06, "loss": 0.8188, "step": 16617 }, { "epoch": 0.5869777340179876, "grad_norm": 1.6084914207458496, "learning_rate": 3.845055119602259e-06, "loss": 0.7802, "step": 16618 }, { "epoch": 0.5870130558216955, "grad_norm": 0.9178575277328491, "learning_rate": 3.844498586507998e-06, "loss": 0.5912, "step": 16619 }, { "epoch": 0.5870483776254034, "grad_norm": 1.7062243223190308, "learning_rate": 3.843942068536723e-06, "loss": 0.7927, "step": 16620 }, { "epoch": 0.5870836994291113, "grad_norm": 1.6354767084121704, "learning_rate": 3.843385565695717e-06, "loss": 0.7941, "step": 16621 }, { "epoch": 0.5871190212328192, "grad_norm": 1.5598278045654297, "learning_rate": 3.842829077992262e-06, "loss": 0.7714, "step": 16622 }, { "epoch": 0.5871543430365271, "grad_norm": 1.8693032264709473, "learning_rate": 3.842272605433644e-06, "loss": 0.7959, "step": 16623 }, { "epoch": 0.587189664840235, "grad_norm": 1.8866841793060303, "learning_rate": 3.841716148027143e-06, "loss": 0.8372, "step": 16624 }, { "epoch": 0.587224986643943, "grad_norm": 1.8287522792816162, "learning_rate": 3.841159705780044e-06, "loss": 0.7905, "step": 16625 }, { "epoch": 0.5872603084476509, "grad_norm": 1.7183887958526611, "learning_rate": 3.840603278699629e-06, "loss": 0.8077, "step": 16626 }, { "epoch": 0.5872956302513588, "grad_norm": 1.4710205793380737, "learning_rate": 3.84004686679318e-06, "loss": 0.7593, "step": 16627 }, { "epoch": 0.5873309520550667, "grad_norm": 1.597076654434204, "learning_rate": 3.839490470067981e-06, "loss": 0.7755, "step": 16628 }, { "epoch": 0.5873662738587746, "grad_norm": 1.637966513633728, "learning_rate": 3.838934088531312e-06, "loss": 0.8145, "step": 16629 }, { "epoch": 0.5874015956624825, "grad_norm": 1.6397864818572998, "learning_rate": 3.8383777221904535e-06, "loss": 0.7615, "step": 16630 }, { "epoch": 0.5874369174661904, "grad_norm": 1.8502333164215088, "learning_rate": 3.8378213710526935e-06, "loss": 0.7715, "step": 16631 }, { "epoch": 0.5874722392698983, "grad_norm": 1.9464075565338135, "learning_rate": 3.837265035125306e-06, "loss": 0.7999, "step": 16632 }, { "epoch": 0.5875075610736062, "grad_norm": 1.8941868543624878, "learning_rate": 3.836708714415575e-06, "loss": 0.7797, "step": 16633 }, { "epoch": 0.5875428828773142, "grad_norm": 1.792702078819275, "learning_rate": 3.836152408930781e-06, "loss": 0.8331, "step": 16634 }, { "epoch": 0.5875782046810221, "grad_norm": 1.7099530696868896, "learning_rate": 3.8355961186782064e-06, "loss": 0.7667, "step": 16635 }, { "epoch": 0.58761352648473, "grad_norm": 1.5552887916564941, "learning_rate": 3.835039843665131e-06, "loss": 0.7385, "step": 16636 }, { "epoch": 0.5876488482884379, "grad_norm": 1.6923770904541016, "learning_rate": 3.834483583898835e-06, "loss": 0.7661, "step": 16637 }, { "epoch": 0.5876841700921458, "grad_norm": 1.6138509511947632, "learning_rate": 3.8339273393865986e-06, "loss": 0.7601, "step": 16638 }, { "epoch": 0.5877194918958537, "grad_norm": 1.7488583326339722, "learning_rate": 3.833371110135702e-06, "loss": 0.8212, "step": 16639 }, { "epoch": 0.5877548136995616, "grad_norm": 1.776186466217041, "learning_rate": 3.832814896153425e-06, "loss": 0.7704, "step": 16640 }, { "epoch": 0.5877901355032695, "grad_norm": 1.5629090070724487, "learning_rate": 3.832258697447048e-06, "loss": 0.7735, "step": 16641 }, { "epoch": 0.5878254573069773, "grad_norm": 1.671754002571106, "learning_rate": 3.831702514023849e-06, "loss": 0.7508, "step": 16642 }, { "epoch": 0.5878607791106852, "grad_norm": 1.64249849319458, "learning_rate": 3.831146345891107e-06, "loss": 0.7486, "step": 16643 }, { "epoch": 0.5878961009143931, "grad_norm": 1.6042449474334717, "learning_rate": 3.830590193056103e-06, "loss": 0.7971, "step": 16644 }, { "epoch": 0.5879314227181011, "grad_norm": 1.6751933097839355, "learning_rate": 3.830034055526115e-06, "loss": 0.7672, "step": 16645 }, { "epoch": 0.587966744521809, "grad_norm": 1.6547073125839233, "learning_rate": 3.82947793330842e-06, "loss": 0.7652, "step": 16646 }, { "epoch": 0.5880020663255169, "grad_norm": 1.902750849723816, "learning_rate": 3.828921826410299e-06, "loss": 0.8008, "step": 16647 }, { "epoch": 0.5880373881292248, "grad_norm": 1.6034742593765259, "learning_rate": 3.828365734839028e-06, "loss": 0.7954, "step": 16648 }, { "epoch": 0.5880727099329327, "grad_norm": 1.7730321884155273, "learning_rate": 3.827809658601889e-06, "loss": 0.7919, "step": 16649 }, { "epoch": 0.5881080317366406, "grad_norm": 1.519610047340393, "learning_rate": 3.8272535977061535e-06, "loss": 0.7884, "step": 16650 }, { "epoch": 0.5881433535403485, "grad_norm": 1.5324125289916992, "learning_rate": 3.826697552159104e-06, "loss": 0.7296, "step": 16651 }, { "epoch": 0.5881786753440564, "grad_norm": 1.8295377492904663, "learning_rate": 3.826141521968016e-06, "loss": 0.7853, "step": 16652 }, { "epoch": 0.5882139971477643, "grad_norm": 1.632610559463501, "learning_rate": 3.825585507140166e-06, "loss": 0.8132, "step": 16653 }, { "epoch": 0.5882493189514723, "grad_norm": 1.8406533002853394, "learning_rate": 3.825029507682833e-06, "loss": 0.7779, "step": 16654 }, { "epoch": 0.5882846407551802, "grad_norm": 1.5817484855651855, "learning_rate": 3.8244735236032935e-06, "loss": 0.7608, "step": 16655 }, { "epoch": 0.5883199625588881, "grad_norm": 1.6400314569473267, "learning_rate": 3.823917554908822e-06, "loss": 0.81, "step": 16656 }, { "epoch": 0.588355284362596, "grad_norm": 1.5610212087631226, "learning_rate": 3.823361601606699e-06, "loss": 0.7773, "step": 16657 }, { "epoch": 0.5883906061663039, "grad_norm": 1.6617575883865356, "learning_rate": 3.822805663704195e-06, "loss": 0.7937, "step": 16658 }, { "epoch": 0.5884259279700118, "grad_norm": 2.078418016433716, "learning_rate": 3.8222497412085915e-06, "loss": 0.7854, "step": 16659 }, { "epoch": 0.5884612497737197, "grad_norm": 1.718144416809082, "learning_rate": 3.821693834127162e-06, "loss": 0.8001, "step": 16660 }, { "epoch": 0.5884965715774276, "grad_norm": 1.6616363525390625, "learning_rate": 3.8211379424671805e-06, "loss": 0.7861, "step": 16661 }, { "epoch": 0.5885318933811355, "grad_norm": 1.6513649225234985, "learning_rate": 3.820582066235925e-06, "loss": 0.7854, "step": 16662 }, { "epoch": 0.5885672151848435, "grad_norm": 1.6990740299224854, "learning_rate": 3.82002620544067e-06, "loss": 0.7777, "step": 16663 }, { "epoch": 0.5886025369885514, "grad_norm": 1.7665990591049194, "learning_rate": 3.81947036008869e-06, "loss": 0.7847, "step": 16664 }, { "epoch": 0.5886378587922593, "grad_norm": 1.617897629737854, "learning_rate": 3.8189145301872595e-06, "loss": 0.813, "step": 16665 }, { "epoch": 0.5886731805959672, "grad_norm": 1.5887757539749146, "learning_rate": 3.818358715743654e-06, "loss": 0.8531, "step": 16666 }, { "epoch": 0.5887085023996751, "grad_norm": 1.79500412940979, "learning_rate": 3.817802916765148e-06, "loss": 0.8157, "step": 16667 }, { "epoch": 0.5887438242033829, "grad_norm": 1.6647917032241821, "learning_rate": 3.817247133259015e-06, "loss": 0.7531, "step": 16668 }, { "epoch": 0.5887791460070908, "grad_norm": 1.8126634359359741, "learning_rate": 3.81669136523253e-06, "loss": 0.7941, "step": 16669 }, { "epoch": 0.5888144678107987, "grad_norm": 1.5237419605255127, "learning_rate": 3.816135612692965e-06, "loss": 0.7389, "step": 16670 }, { "epoch": 0.5888497896145066, "grad_norm": 1.7452261447906494, "learning_rate": 3.815579875647593e-06, "loss": 0.8154, "step": 16671 }, { "epoch": 0.5888851114182145, "grad_norm": 1.8017209768295288, "learning_rate": 3.815024154103691e-06, "loss": 0.7554, "step": 16672 }, { "epoch": 0.5889204332219224, "grad_norm": 1.6635984182357788, "learning_rate": 3.81446844806853e-06, "loss": 0.7537, "step": 16673 }, { "epoch": 0.5889557550256304, "grad_norm": 2.0467729568481445, "learning_rate": 3.813912757549382e-06, "loss": 0.7609, "step": 16674 }, { "epoch": 0.5889910768293383, "grad_norm": 1.7145497798919678, "learning_rate": 3.8133570825535214e-06, "loss": 0.8116, "step": 16675 }, { "epoch": 0.5890263986330462, "grad_norm": 1.7018226385116577, "learning_rate": 3.8128014230882205e-06, "loss": 0.7426, "step": 16676 }, { "epoch": 0.5890617204367541, "grad_norm": 1.6354727745056152, "learning_rate": 3.812245779160751e-06, "loss": 0.7601, "step": 16677 }, { "epoch": 0.589097042240462, "grad_norm": 1.5723850727081299, "learning_rate": 3.8116901507783855e-06, "loss": 0.789, "step": 16678 }, { "epoch": 0.5891323640441699, "grad_norm": 1.517693281173706, "learning_rate": 3.8111345379483966e-06, "loss": 0.7847, "step": 16679 }, { "epoch": 0.5891676858478778, "grad_norm": 1.6568222045898438, "learning_rate": 3.810578940678054e-06, "loss": 0.7951, "step": 16680 }, { "epoch": 0.5892030076515857, "grad_norm": 1.6007353067398071, "learning_rate": 3.8100233589746323e-06, "loss": 0.7822, "step": 16681 }, { "epoch": 0.5892383294552936, "grad_norm": 1.6387923955917358, "learning_rate": 3.809467792845399e-06, "loss": 0.7637, "step": 16682 }, { "epoch": 0.5892736512590016, "grad_norm": 1.611816167831421, "learning_rate": 3.8089122422976294e-06, "loss": 0.7706, "step": 16683 }, { "epoch": 0.5893089730627095, "grad_norm": 1.5519599914550781, "learning_rate": 3.8083567073385925e-06, "loss": 0.7737, "step": 16684 }, { "epoch": 0.5893442948664174, "grad_norm": 1.6105806827545166, "learning_rate": 3.807801187975557e-06, "loss": 0.7665, "step": 16685 }, { "epoch": 0.5893796166701253, "grad_norm": 1.6952433586120605, "learning_rate": 3.8072456842158e-06, "loss": 0.7791, "step": 16686 }, { "epoch": 0.5894149384738332, "grad_norm": 1.7004978656768799, "learning_rate": 3.806690196066583e-06, "loss": 0.772, "step": 16687 }, { "epoch": 0.5894502602775411, "grad_norm": 1.810971975326538, "learning_rate": 3.806134723535182e-06, "loss": 0.8316, "step": 16688 }, { "epoch": 0.589485582081249, "grad_norm": 1.6502940654754639, "learning_rate": 3.8055792666288648e-06, "loss": 0.7832, "step": 16689 }, { "epoch": 0.5895209038849569, "grad_norm": 1.520345687866211, "learning_rate": 3.8050238253549012e-06, "loss": 0.777, "step": 16690 }, { "epoch": 0.5895562256886648, "grad_norm": 1.6148959398269653, "learning_rate": 3.8044683997205624e-06, "loss": 0.8052, "step": 16691 }, { "epoch": 0.5895915474923727, "grad_norm": 1.5659542083740234, "learning_rate": 3.8039129897331143e-06, "loss": 0.7704, "step": 16692 }, { "epoch": 0.5896268692960807, "grad_norm": 1.6434510946273804, "learning_rate": 3.8033575953998304e-06, "loss": 0.75, "step": 16693 }, { "epoch": 0.5896621910997885, "grad_norm": 1.6488009691238403, "learning_rate": 3.8028022167279764e-06, "loss": 0.7965, "step": 16694 }, { "epoch": 0.5896975129034964, "grad_norm": 1.7687757015228271, "learning_rate": 3.8022468537248216e-06, "loss": 0.8051, "step": 16695 }, { "epoch": 0.5897328347072043, "grad_norm": 1.9409739971160889, "learning_rate": 3.8016915063976357e-06, "loss": 0.7998, "step": 16696 }, { "epoch": 0.5897681565109122, "grad_norm": 1.678562879562378, "learning_rate": 3.8011361747536857e-06, "loss": 0.7917, "step": 16697 }, { "epoch": 0.5898034783146201, "grad_norm": 1.717572808265686, "learning_rate": 3.800580858800239e-06, "loss": 0.7861, "step": 16698 }, { "epoch": 0.589838800118328, "grad_norm": 1.6352958679199219, "learning_rate": 3.800025558544566e-06, "loss": 0.7862, "step": 16699 }, { "epoch": 0.5898741219220359, "grad_norm": 1.629634976387024, "learning_rate": 3.7994702739939324e-06, "loss": 0.7876, "step": 16700 }, { "epoch": 0.5899094437257438, "grad_norm": 1.61614191532135, "learning_rate": 3.7989150051556057e-06, "loss": 0.7941, "step": 16701 }, { "epoch": 0.5899447655294517, "grad_norm": 1.7770990133285522, "learning_rate": 3.798359752036855e-06, "loss": 0.7899, "step": 16702 }, { "epoch": 0.5899800873331597, "grad_norm": 2.4591286182403564, "learning_rate": 3.7978045146449457e-06, "loss": 0.7795, "step": 16703 }, { "epoch": 0.5900154091368676, "grad_norm": 1.5576364994049072, "learning_rate": 3.797249292987143e-06, "loss": 0.7433, "step": 16704 }, { "epoch": 0.5900507309405755, "grad_norm": 1.6919269561767578, "learning_rate": 3.7966940870707186e-06, "loss": 0.7864, "step": 16705 }, { "epoch": 0.5900860527442834, "grad_norm": 1.7597227096557617, "learning_rate": 3.7961388969029346e-06, "loss": 0.7713, "step": 16706 }, { "epoch": 0.5901213745479913, "grad_norm": 1.5823166370391846, "learning_rate": 3.795583722491058e-06, "loss": 0.7797, "step": 16707 }, { "epoch": 0.5901566963516992, "grad_norm": 1.6372374296188354, "learning_rate": 3.7950285638423544e-06, "loss": 0.8069, "step": 16708 }, { "epoch": 0.5901920181554071, "grad_norm": 1.8362045288085938, "learning_rate": 3.7944734209640916e-06, "loss": 0.7979, "step": 16709 }, { "epoch": 0.590227339959115, "grad_norm": 1.8487144708633423, "learning_rate": 3.793918293863534e-06, "loss": 0.7873, "step": 16710 }, { "epoch": 0.5902626617628229, "grad_norm": 1.7737261056900024, "learning_rate": 3.7933631825479456e-06, "loss": 0.7892, "step": 16711 }, { "epoch": 0.5902979835665308, "grad_norm": 1.6879757642745972, "learning_rate": 3.792808087024594e-06, "loss": 0.7959, "step": 16712 }, { "epoch": 0.5903333053702388, "grad_norm": 1.6568659543991089, "learning_rate": 3.792253007300744e-06, "loss": 0.8301, "step": 16713 }, { "epoch": 0.5903686271739467, "grad_norm": 1.6485087871551514, "learning_rate": 3.791697943383658e-06, "loss": 0.7556, "step": 16714 }, { "epoch": 0.5904039489776546, "grad_norm": 1.826029896736145, "learning_rate": 3.791142895280604e-06, "loss": 0.7648, "step": 16715 }, { "epoch": 0.5904392707813625, "grad_norm": 1.683556318283081, "learning_rate": 3.7905878629988424e-06, "loss": 0.8, "step": 16716 }, { "epoch": 0.5904745925850704, "grad_norm": 1.5368664264678955, "learning_rate": 3.7900328465456413e-06, "loss": 0.7664, "step": 16717 }, { "epoch": 0.5905099143887783, "grad_norm": 1.5578458309173584, "learning_rate": 3.789477845928263e-06, "loss": 0.7936, "step": 16718 }, { "epoch": 0.5905452361924862, "grad_norm": 1.8172074556350708, "learning_rate": 3.7889228611539695e-06, "loss": 0.8022, "step": 16719 }, { "epoch": 0.590580557996194, "grad_norm": 1.6252256631851196, "learning_rate": 3.788367892230027e-06, "loss": 0.7698, "step": 16720 }, { "epoch": 0.5906158797999019, "grad_norm": 1.9591838121414185, "learning_rate": 3.787812939163698e-06, "loss": 0.804, "step": 16721 }, { "epoch": 0.5906512016036098, "grad_norm": 1.9103118181228638, "learning_rate": 3.787258001962244e-06, "loss": 0.8073, "step": 16722 }, { "epoch": 0.5906865234073178, "grad_norm": 1.738632082939148, "learning_rate": 3.786703080632933e-06, "loss": 0.7915, "step": 16723 }, { "epoch": 0.5907218452110257, "grad_norm": 1.6215606927871704, "learning_rate": 3.786148175183021e-06, "loss": 0.7632, "step": 16724 }, { "epoch": 0.5907571670147336, "grad_norm": 1.8409371376037598, "learning_rate": 3.7855932856197743e-06, "loss": 0.732, "step": 16725 }, { "epoch": 0.5907924888184415, "grad_norm": 1.4623361825942993, "learning_rate": 3.7850384119504526e-06, "loss": 0.7729, "step": 16726 }, { "epoch": 0.5908278106221494, "grad_norm": 1.6416865587234497, "learning_rate": 3.7844835541823215e-06, "loss": 0.7415, "step": 16727 }, { "epoch": 0.5908631324258573, "grad_norm": 1.7238432168960571, "learning_rate": 3.783928712322641e-06, "loss": 0.8085, "step": 16728 }, { "epoch": 0.5908984542295652, "grad_norm": 1.5534168481826782, "learning_rate": 3.7833738863786716e-06, "loss": 0.7846, "step": 16729 }, { "epoch": 0.5909337760332731, "grad_norm": 1.7198634147644043, "learning_rate": 3.7828190763576776e-06, "loss": 0.8142, "step": 16730 }, { "epoch": 0.590969097836981, "grad_norm": 1.7149391174316406, "learning_rate": 3.7822642822669187e-06, "loss": 0.8209, "step": 16731 }, { "epoch": 0.591004419640689, "grad_norm": 1.795768141746521, "learning_rate": 3.7817095041136548e-06, "loss": 0.8041, "step": 16732 }, { "epoch": 0.5910397414443969, "grad_norm": 1.597698450088501, "learning_rate": 3.781154741905149e-06, "loss": 0.7846, "step": 16733 }, { "epoch": 0.5910750632481048, "grad_norm": 1.6302777528762817, "learning_rate": 3.7805999956486607e-06, "loss": 0.7947, "step": 16734 }, { "epoch": 0.5911103850518127, "grad_norm": 1.8434935808181763, "learning_rate": 3.7800452653514496e-06, "loss": 0.7504, "step": 16735 }, { "epoch": 0.5911457068555206, "grad_norm": 2.2268755435943604, "learning_rate": 3.779490551020778e-06, "loss": 0.8066, "step": 16736 }, { "epoch": 0.5911810286592285, "grad_norm": 1.6749037504196167, "learning_rate": 3.778935852663904e-06, "loss": 0.7638, "step": 16737 }, { "epoch": 0.5912163504629364, "grad_norm": 1.5646573305130005, "learning_rate": 3.7783811702880867e-06, "loss": 0.7721, "step": 16738 }, { "epoch": 0.5912516722666443, "grad_norm": 1.7913578748703003, "learning_rate": 3.777826503900589e-06, "loss": 0.8224, "step": 16739 }, { "epoch": 0.5912869940703522, "grad_norm": 1.8155778646469116, "learning_rate": 3.777271853508667e-06, "loss": 0.7944, "step": 16740 }, { "epoch": 0.5913223158740601, "grad_norm": 1.639283299446106, "learning_rate": 3.776717219119582e-06, "loss": 0.7846, "step": 16741 }, { "epoch": 0.591357637677768, "grad_norm": 1.6922937631607056, "learning_rate": 3.7761626007405933e-06, "loss": 0.7594, "step": 16742 }, { "epoch": 0.591392959481476, "grad_norm": 1.6788840293884277, "learning_rate": 3.7756079983789574e-06, "loss": 0.7854, "step": 16743 }, { "epoch": 0.5914282812851839, "grad_norm": 1.742253065109253, "learning_rate": 3.7750534120419337e-06, "loss": 0.8086, "step": 16744 }, { "epoch": 0.5914636030888918, "grad_norm": 1.6347017288208008, "learning_rate": 3.774498841736779e-06, "loss": 0.7559, "step": 16745 }, { "epoch": 0.5914989248925996, "grad_norm": 1.6997677087783813, "learning_rate": 3.773944287470755e-06, "loss": 0.7714, "step": 16746 }, { "epoch": 0.5915342466963075, "grad_norm": 1.5242289304733276, "learning_rate": 3.7733897492511175e-06, "loss": 0.7739, "step": 16747 }, { "epoch": 0.5915695685000154, "grad_norm": 1.6278423070907593, "learning_rate": 3.772835227085123e-06, "loss": 0.7561, "step": 16748 }, { "epoch": 0.5916048903037233, "grad_norm": 1.6949548721313477, "learning_rate": 3.7722807209800317e-06, "loss": 0.7625, "step": 16749 }, { "epoch": 0.5916402121074312, "grad_norm": 1.7504403591156006, "learning_rate": 3.7717262309430987e-06, "loss": 0.7895, "step": 16750 }, { "epoch": 0.5916755339111391, "grad_norm": 1.5838350057601929, "learning_rate": 3.7711717569815825e-06, "loss": 0.7879, "step": 16751 }, { "epoch": 0.591710855714847, "grad_norm": 1.934046983718872, "learning_rate": 3.7706172991027397e-06, "loss": 0.7991, "step": 16752 }, { "epoch": 0.591746177518555, "grad_norm": 1.6780672073364258, "learning_rate": 3.7700628573138252e-06, "loss": 0.803, "step": 16753 }, { "epoch": 0.5917814993222629, "grad_norm": 1.8301531076431274, "learning_rate": 3.769508431622098e-06, "loss": 0.8305, "step": 16754 }, { "epoch": 0.5918168211259708, "grad_norm": 1.707611083984375, "learning_rate": 3.7689540220348136e-06, "loss": 0.8344, "step": 16755 }, { "epoch": 0.5918521429296787, "grad_norm": 1.6362296342849731, "learning_rate": 3.768399628559226e-06, "loss": 0.7721, "step": 16756 }, { "epoch": 0.5918874647333866, "grad_norm": 1.7256146669387817, "learning_rate": 3.767845251202594e-06, "loss": 0.8051, "step": 16757 }, { "epoch": 0.5919227865370945, "grad_norm": 1.584488034248352, "learning_rate": 3.7672908899721713e-06, "loss": 0.7422, "step": 16758 }, { "epoch": 0.5919581083408024, "grad_norm": 1.6589126586914062, "learning_rate": 3.7667365448752123e-06, "loss": 0.751, "step": 16759 }, { "epoch": 0.5919934301445103, "grad_norm": 1.5489765405654907, "learning_rate": 3.7661822159189765e-06, "loss": 0.7592, "step": 16760 }, { "epoch": 0.5920287519482182, "grad_norm": 1.6775355339050293, "learning_rate": 3.7656279031107145e-06, "loss": 0.7643, "step": 16761 }, { "epoch": 0.5920640737519262, "grad_norm": 1.496048927307129, "learning_rate": 3.7650736064576827e-06, "loss": 0.7959, "step": 16762 }, { "epoch": 0.5920993955556341, "grad_norm": 1.5689893960952759, "learning_rate": 3.7645193259671344e-06, "loss": 0.7735, "step": 16763 }, { "epoch": 0.592134717359342, "grad_norm": 1.6556572914123535, "learning_rate": 3.763965061646325e-06, "loss": 0.8358, "step": 16764 }, { "epoch": 0.5921700391630499, "grad_norm": 1.9857069253921509, "learning_rate": 3.76341081350251e-06, "loss": 0.7973, "step": 16765 }, { "epoch": 0.5922053609667578, "grad_norm": 0.8981993794441223, "learning_rate": 3.7628565815429396e-06, "loss": 0.5538, "step": 16766 }, { "epoch": 0.5922406827704657, "grad_norm": 1.6633778810501099, "learning_rate": 3.762302365774871e-06, "loss": 0.7697, "step": 16767 }, { "epoch": 0.5922760045741736, "grad_norm": 1.7127333879470825, "learning_rate": 3.761748166205557e-06, "loss": 0.8353, "step": 16768 }, { "epoch": 0.5923113263778815, "grad_norm": 1.8532578945159912, "learning_rate": 3.761193982842249e-06, "loss": 0.7821, "step": 16769 }, { "epoch": 0.5923466481815894, "grad_norm": 2.053281307220459, "learning_rate": 3.760639815692202e-06, "loss": 0.7807, "step": 16770 }, { "epoch": 0.5923819699852974, "grad_norm": 1.7117208242416382, "learning_rate": 3.7600856647626683e-06, "loss": 0.7868, "step": 16771 }, { "epoch": 0.5924172917890052, "grad_norm": 1.8545607328414917, "learning_rate": 3.759531530060899e-06, "loss": 0.7962, "step": 16772 }, { "epoch": 0.5924526135927131, "grad_norm": 2.3406341075897217, "learning_rate": 3.758977411594149e-06, "loss": 0.7717, "step": 16773 }, { "epoch": 0.592487935396421, "grad_norm": 1.6966309547424316, "learning_rate": 3.7584233093696685e-06, "loss": 0.7873, "step": 16774 }, { "epoch": 0.5925232572001289, "grad_norm": 1.4620320796966553, "learning_rate": 3.7578692233947113e-06, "loss": 0.7837, "step": 16775 }, { "epoch": 0.5925585790038368, "grad_norm": 1.546371340751648, "learning_rate": 3.757315153676528e-06, "loss": 0.7777, "step": 16776 }, { "epoch": 0.5925939008075447, "grad_norm": 1.483764410018921, "learning_rate": 3.7567611002223695e-06, "loss": 0.7702, "step": 16777 }, { "epoch": 0.5926292226112526, "grad_norm": 1.614927887916565, "learning_rate": 3.756207063039489e-06, "loss": 0.8051, "step": 16778 }, { "epoch": 0.5926645444149605, "grad_norm": 1.646253228187561, "learning_rate": 3.7556530421351378e-06, "loss": 0.7648, "step": 16779 }, { "epoch": 0.5926998662186684, "grad_norm": 1.5796436071395874, "learning_rate": 3.755099037516565e-06, "loss": 0.7523, "step": 16780 }, { "epoch": 0.5927351880223763, "grad_norm": 1.5981614589691162, "learning_rate": 3.7545450491910212e-06, "loss": 0.8024, "step": 16781 }, { "epoch": 0.5927705098260843, "grad_norm": 1.7452627420425415, "learning_rate": 3.7539910771657573e-06, "loss": 0.8016, "step": 16782 }, { "epoch": 0.5928058316297922, "grad_norm": 1.5501261949539185, "learning_rate": 3.7534371214480247e-06, "loss": 0.8084, "step": 16783 }, { "epoch": 0.5928411534335001, "grad_norm": 1.814013957977295, "learning_rate": 3.7528831820450718e-06, "loss": 0.8317, "step": 16784 }, { "epoch": 0.592876475237208, "grad_norm": 1.755807876586914, "learning_rate": 3.75232925896415e-06, "loss": 0.7739, "step": 16785 }, { "epoch": 0.5929117970409159, "grad_norm": 1.599877953529358, "learning_rate": 3.751775352212509e-06, "loss": 0.7904, "step": 16786 }, { "epoch": 0.5929471188446238, "grad_norm": 1.490555763244629, "learning_rate": 3.751221461797396e-06, "loss": 0.7608, "step": 16787 }, { "epoch": 0.5929824406483317, "grad_norm": 1.6786099672317505, "learning_rate": 3.750667587726063e-06, "loss": 0.7893, "step": 16788 }, { "epoch": 0.5930177624520396, "grad_norm": 1.529989242553711, "learning_rate": 3.7501137300057576e-06, "loss": 0.7402, "step": 16789 }, { "epoch": 0.5930530842557475, "grad_norm": 1.864780068397522, "learning_rate": 3.749559888643728e-06, "loss": 0.7899, "step": 16790 }, { "epoch": 0.5930884060594555, "grad_norm": 1.4774000644683838, "learning_rate": 3.7490060636472243e-06, "loss": 0.7296, "step": 16791 }, { "epoch": 0.5931237278631634, "grad_norm": 1.9234395027160645, "learning_rate": 3.7484522550234936e-06, "loss": 0.8374, "step": 16792 }, { "epoch": 0.5931590496668713, "grad_norm": 1.6330175399780273, "learning_rate": 3.7478984627797843e-06, "loss": 0.7811, "step": 16793 }, { "epoch": 0.5931943714705792, "grad_norm": 1.7339941263198853, "learning_rate": 3.7473446869233453e-06, "loss": 0.8171, "step": 16794 }, { "epoch": 0.5932296932742871, "grad_norm": 1.577746033668518, "learning_rate": 3.746790927461424e-06, "loss": 0.7688, "step": 16795 }, { "epoch": 0.593265015077995, "grad_norm": 1.6193314790725708, "learning_rate": 3.7462371844012658e-06, "loss": 0.834, "step": 16796 }, { "epoch": 0.5933003368817029, "grad_norm": 1.705108642578125, "learning_rate": 3.7456834577501222e-06, "loss": 0.7707, "step": 16797 }, { "epoch": 0.5933356586854107, "grad_norm": 1.6432621479034424, "learning_rate": 3.745129747515236e-06, "loss": 0.8089, "step": 16798 }, { "epoch": 0.5933709804891186, "grad_norm": 1.6941696405410767, "learning_rate": 3.744576053703857e-06, "loss": 0.7883, "step": 16799 }, { "epoch": 0.5934063022928265, "grad_norm": 1.6990941762924194, "learning_rate": 3.744022376323228e-06, "loss": 0.7844, "step": 16800 }, { "epoch": 0.5934416240965344, "grad_norm": 1.6490980386734009, "learning_rate": 3.7434687153806002e-06, "loss": 0.7846, "step": 16801 }, { "epoch": 0.5934769459002424, "grad_norm": 1.7986059188842773, "learning_rate": 3.7429150708832174e-06, "loss": 0.7945, "step": 16802 }, { "epoch": 0.5935122677039503, "grad_norm": 1.72225821018219, "learning_rate": 3.7423614428383247e-06, "loss": 0.7845, "step": 16803 }, { "epoch": 0.5935475895076582, "grad_norm": 0.883392870426178, "learning_rate": 3.74180783125317e-06, "loss": 0.5812, "step": 16804 }, { "epoch": 0.5935829113113661, "grad_norm": 1.7525969743728638, "learning_rate": 3.741254236134998e-06, "loss": 0.7595, "step": 16805 }, { "epoch": 0.593618233115074, "grad_norm": 1.6421033143997192, "learning_rate": 3.7407006574910527e-06, "loss": 0.8075, "step": 16806 }, { "epoch": 0.5936535549187819, "grad_norm": 1.6225738525390625, "learning_rate": 3.7401470953285817e-06, "loss": 0.789, "step": 16807 }, { "epoch": 0.5936888767224898, "grad_norm": 1.518190860748291, "learning_rate": 3.7395935496548276e-06, "loss": 0.7634, "step": 16808 }, { "epoch": 0.5937241985261977, "grad_norm": 1.8156648874282837, "learning_rate": 3.739040020477037e-06, "loss": 0.7939, "step": 16809 }, { "epoch": 0.5937595203299056, "grad_norm": 1.9961822032928467, "learning_rate": 3.738486507802454e-06, "loss": 0.763, "step": 16810 }, { "epoch": 0.5937948421336136, "grad_norm": 1.771794080734253, "learning_rate": 3.7379330116383217e-06, "loss": 0.8066, "step": 16811 }, { "epoch": 0.5938301639373215, "grad_norm": 1.8881971836090088, "learning_rate": 3.737379531991885e-06, "loss": 0.7819, "step": 16812 }, { "epoch": 0.5938654857410294, "grad_norm": 2.1249632835388184, "learning_rate": 3.7368260688703883e-06, "loss": 0.8413, "step": 16813 }, { "epoch": 0.5939008075447373, "grad_norm": 1.5838481187820435, "learning_rate": 3.736272622281073e-06, "loss": 0.7714, "step": 16814 }, { "epoch": 0.5939361293484452, "grad_norm": 1.6531258821487427, "learning_rate": 3.735719192231186e-06, "loss": 0.8044, "step": 16815 }, { "epoch": 0.5939714511521531, "grad_norm": 1.5889748334884644, "learning_rate": 3.735165778727969e-06, "loss": 0.7452, "step": 16816 }, { "epoch": 0.594006772955861, "grad_norm": 1.6796073913574219, "learning_rate": 3.7346123817786635e-06, "loss": 0.7922, "step": 16817 }, { "epoch": 0.5940420947595689, "grad_norm": 1.6377382278442383, "learning_rate": 3.734059001390512e-06, "loss": 0.7881, "step": 16818 }, { "epoch": 0.5940774165632768, "grad_norm": 1.5300424098968506, "learning_rate": 3.73350563757076e-06, "loss": 0.7497, "step": 16819 }, { "epoch": 0.5941127383669847, "grad_norm": 1.5726934671401978, "learning_rate": 3.732952290326648e-06, "loss": 0.7679, "step": 16820 }, { "epoch": 0.5941480601706927, "grad_norm": 1.7278767824172974, "learning_rate": 3.7323989596654166e-06, "loss": 0.7848, "step": 16821 }, { "epoch": 0.5941833819744006, "grad_norm": 1.995404601097107, "learning_rate": 3.731845645594311e-06, "loss": 0.7937, "step": 16822 }, { "epoch": 0.5942187037781085, "grad_norm": 1.6528823375701904, "learning_rate": 3.731292348120571e-06, "loss": 0.8206, "step": 16823 }, { "epoch": 0.5942540255818163, "grad_norm": 1.7918660640716553, "learning_rate": 3.7307390672514367e-06, "loss": 0.7976, "step": 16824 }, { "epoch": 0.5942893473855242, "grad_norm": 1.8366289138793945, "learning_rate": 3.730185802994153e-06, "loss": 0.7758, "step": 16825 }, { "epoch": 0.5943246691892321, "grad_norm": 1.7812249660491943, "learning_rate": 3.7296325553559574e-06, "loss": 0.7658, "step": 16826 }, { "epoch": 0.59435999099294, "grad_norm": 1.9578593969345093, "learning_rate": 3.7290793243440927e-06, "loss": 0.7702, "step": 16827 }, { "epoch": 0.5943953127966479, "grad_norm": 1.502310037612915, "learning_rate": 3.728526109965799e-06, "loss": 0.7564, "step": 16828 }, { "epoch": 0.5944306346003558, "grad_norm": 1.859391689300537, "learning_rate": 3.7279729122283166e-06, "loss": 0.7956, "step": 16829 }, { "epoch": 0.5944659564040637, "grad_norm": 1.578513264656067, "learning_rate": 3.727419731138885e-06, "loss": 0.7765, "step": 16830 }, { "epoch": 0.5945012782077717, "grad_norm": 1.705482006072998, "learning_rate": 3.726866566704745e-06, "loss": 0.7881, "step": 16831 }, { "epoch": 0.5945366000114796, "grad_norm": 1.650006651878357, "learning_rate": 3.726313418933135e-06, "loss": 0.7906, "step": 16832 }, { "epoch": 0.5945719218151875, "grad_norm": 1.6479679346084595, "learning_rate": 3.725760287831297e-06, "loss": 0.7986, "step": 16833 }, { "epoch": 0.5946072436188954, "grad_norm": 1.664738655090332, "learning_rate": 3.7252071734064704e-06, "loss": 0.7388, "step": 16834 }, { "epoch": 0.5946425654226033, "grad_norm": 1.566133975982666, "learning_rate": 3.724654075665891e-06, "loss": 0.7749, "step": 16835 }, { "epoch": 0.5946778872263112, "grad_norm": 1.730850338935852, "learning_rate": 3.7241009946167993e-06, "loss": 0.8239, "step": 16836 }, { "epoch": 0.5947132090300191, "grad_norm": 1.71833074092865, "learning_rate": 3.7235479302664334e-06, "loss": 0.8442, "step": 16837 }, { "epoch": 0.594748530833727, "grad_norm": 1.6480987071990967, "learning_rate": 3.722994882622033e-06, "loss": 0.8036, "step": 16838 }, { "epoch": 0.5947838526374349, "grad_norm": 1.557175874710083, "learning_rate": 3.7224418516908354e-06, "loss": 0.748, "step": 16839 }, { "epoch": 0.5948191744411429, "grad_norm": 1.7892062664031982, "learning_rate": 3.7218888374800775e-06, "loss": 0.7819, "step": 16840 }, { "epoch": 0.5948544962448508, "grad_norm": 1.8341443538665771, "learning_rate": 3.7213358399969996e-06, "loss": 0.8043, "step": 16841 }, { "epoch": 0.5948898180485587, "grad_norm": 1.05183744430542, "learning_rate": 3.720782859248837e-06, "loss": 0.5833, "step": 16842 }, { "epoch": 0.5949251398522666, "grad_norm": 1.6432559490203857, "learning_rate": 3.7202298952428284e-06, "loss": 0.781, "step": 16843 }, { "epoch": 0.5949604616559745, "grad_norm": 2.248396635055542, "learning_rate": 3.7196769479862107e-06, "loss": 0.7737, "step": 16844 }, { "epoch": 0.5949957834596824, "grad_norm": 1.771985650062561, "learning_rate": 3.7191240174862192e-06, "loss": 0.7964, "step": 16845 }, { "epoch": 0.5950311052633903, "grad_norm": 1.610904574394226, "learning_rate": 3.7185711037500927e-06, "loss": 0.7849, "step": 16846 }, { "epoch": 0.5950664270670982, "grad_norm": 2.3552026748657227, "learning_rate": 3.7180182067850666e-06, "loss": 0.8023, "step": 16847 }, { "epoch": 0.5951017488708061, "grad_norm": 1.6743634939193726, "learning_rate": 3.717465326598377e-06, "loss": 0.7758, "step": 16848 }, { "epoch": 0.595137070674514, "grad_norm": 1.796280026435852, "learning_rate": 3.7169124631972608e-06, "loss": 0.8064, "step": 16849 }, { "epoch": 0.5951723924782218, "grad_norm": 1.538996696472168, "learning_rate": 3.7163596165889528e-06, "loss": 0.7585, "step": 16850 }, { "epoch": 0.5952077142819298, "grad_norm": 1.651606559753418, "learning_rate": 3.715806786780688e-06, "loss": 0.8297, "step": 16851 }, { "epoch": 0.5952430360856377, "grad_norm": 1.6803815364837646, "learning_rate": 3.7152539737797045e-06, "loss": 0.774, "step": 16852 }, { "epoch": 0.5952783578893456, "grad_norm": 1.8196213245391846, "learning_rate": 3.7147011775932345e-06, "loss": 0.7804, "step": 16853 }, { "epoch": 0.5953136796930535, "grad_norm": 1.5386391878128052, "learning_rate": 3.7141483982285144e-06, "loss": 0.7813, "step": 16854 }, { "epoch": 0.5953490014967614, "grad_norm": 1.6428579092025757, "learning_rate": 3.7135956356927762e-06, "loss": 0.7791, "step": 16855 }, { "epoch": 0.5953843233004693, "grad_norm": 1.7137597799301147, "learning_rate": 3.713042889993258e-06, "loss": 0.8343, "step": 16856 }, { "epoch": 0.5954196451041772, "grad_norm": 1.6317716836929321, "learning_rate": 3.7124901611371933e-06, "loss": 0.755, "step": 16857 }, { "epoch": 0.5954549669078851, "grad_norm": 1.585335612297058, "learning_rate": 3.7119374491318133e-06, "loss": 0.8244, "step": 16858 }, { "epoch": 0.595490288711593, "grad_norm": 3.225212574005127, "learning_rate": 3.711384753984355e-06, "loss": 0.7491, "step": 16859 }, { "epoch": 0.595525610515301, "grad_norm": 1.5712255239486694, "learning_rate": 3.710832075702051e-06, "loss": 0.7391, "step": 16860 }, { "epoch": 0.5955609323190089, "grad_norm": 1.650467038154602, "learning_rate": 3.710279414292134e-06, "loss": 0.7909, "step": 16861 }, { "epoch": 0.5955962541227168, "grad_norm": 1.653132438659668, "learning_rate": 3.7097267697618378e-06, "loss": 0.7593, "step": 16862 }, { "epoch": 0.5956315759264247, "grad_norm": 1.5958772897720337, "learning_rate": 3.7091741421183957e-06, "loss": 0.7462, "step": 16863 }, { "epoch": 0.5956668977301326, "grad_norm": 1.607142448425293, "learning_rate": 3.708621531369039e-06, "loss": 0.756, "step": 16864 }, { "epoch": 0.5957022195338405, "grad_norm": 1.779395341873169, "learning_rate": 3.708068937521001e-06, "loss": 0.7679, "step": 16865 }, { "epoch": 0.5957375413375484, "grad_norm": 1.624754786491394, "learning_rate": 3.7075163605815132e-06, "loss": 0.8241, "step": 16866 }, { "epoch": 0.5957728631412563, "grad_norm": 1.689522624015808, "learning_rate": 3.706963800557809e-06, "loss": 0.7735, "step": 16867 }, { "epoch": 0.5958081849449642, "grad_norm": 1.6848382949829102, "learning_rate": 3.7064112574571202e-06, "loss": 0.779, "step": 16868 }, { "epoch": 0.5958435067486721, "grad_norm": 1.6846922636032104, "learning_rate": 3.705858731286677e-06, "loss": 0.7739, "step": 16869 }, { "epoch": 0.5958788285523801, "grad_norm": 1.6113895177841187, "learning_rate": 3.705306222053712e-06, "loss": 0.7884, "step": 16870 }, { "epoch": 0.595914150356088, "grad_norm": 1.646292805671692, "learning_rate": 3.7047537297654563e-06, "loss": 0.778, "step": 16871 }, { "epoch": 0.5959494721597959, "grad_norm": 1.5230388641357422, "learning_rate": 3.7042012544291396e-06, "loss": 0.7916, "step": 16872 }, { "epoch": 0.5959847939635038, "grad_norm": 1.6298445463180542, "learning_rate": 3.7036487960519934e-06, "loss": 0.7754, "step": 16873 }, { "epoch": 0.5960201157672117, "grad_norm": 1.979544758796692, "learning_rate": 3.703096354641247e-06, "loss": 0.7995, "step": 16874 }, { "epoch": 0.5960554375709196, "grad_norm": 1.6522574424743652, "learning_rate": 3.7025439302041333e-06, "loss": 0.7651, "step": 16875 }, { "epoch": 0.5960907593746274, "grad_norm": 1.644219994544983, "learning_rate": 3.7019915227478787e-06, "loss": 0.7597, "step": 16876 }, { "epoch": 0.5961260811783353, "grad_norm": 1.624575138092041, "learning_rate": 3.7014391322797164e-06, "loss": 0.7461, "step": 16877 }, { "epoch": 0.5961614029820432, "grad_norm": 1.6241497993469238, "learning_rate": 3.700886758806875e-06, "loss": 0.7666, "step": 16878 }, { "epoch": 0.5961967247857511, "grad_norm": 1.603618860244751, "learning_rate": 3.700334402336582e-06, "loss": 0.7338, "step": 16879 }, { "epoch": 0.596232046589459, "grad_norm": 1.6164705753326416, "learning_rate": 3.6997820628760693e-06, "loss": 0.8127, "step": 16880 }, { "epoch": 0.596267368393167, "grad_norm": 1.7606440782546997, "learning_rate": 3.699229740432565e-06, "loss": 0.7919, "step": 16881 }, { "epoch": 0.5963026901968749, "grad_norm": 1.6288955211639404, "learning_rate": 3.6986774350132957e-06, "loss": 0.7561, "step": 16882 }, { "epoch": 0.5963380120005828, "grad_norm": 1.925065279006958, "learning_rate": 3.698125146625493e-06, "loss": 0.7756, "step": 16883 }, { "epoch": 0.5963733338042907, "grad_norm": 1.7965914011001587, "learning_rate": 3.6975728752763834e-06, "loss": 0.7967, "step": 16884 }, { "epoch": 0.5964086556079986, "grad_norm": 1.7563488483428955, "learning_rate": 3.697020620973194e-06, "loss": 0.7748, "step": 16885 }, { "epoch": 0.5964439774117065, "grad_norm": 1.6348950862884521, "learning_rate": 3.696468383723155e-06, "loss": 0.7842, "step": 16886 }, { "epoch": 0.5964792992154144, "grad_norm": 1.5766675472259521, "learning_rate": 3.6959161635334926e-06, "loss": 0.7933, "step": 16887 }, { "epoch": 0.5965146210191223, "grad_norm": 1.6243726015090942, "learning_rate": 3.6953639604114335e-06, "loss": 0.8002, "step": 16888 }, { "epoch": 0.5965499428228302, "grad_norm": 1.4368398189544678, "learning_rate": 3.6948117743642076e-06, "loss": 0.747, "step": 16889 }, { "epoch": 0.5965852646265382, "grad_norm": 1.8488574028015137, "learning_rate": 3.6942596053990387e-06, "loss": 0.7829, "step": 16890 }, { "epoch": 0.5966205864302461, "grad_norm": 1.7946906089782715, "learning_rate": 3.693707453523155e-06, "loss": 0.8132, "step": 16891 }, { "epoch": 0.596655908233954, "grad_norm": 1.5291812419891357, "learning_rate": 3.6931553187437813e-06, "loss": 0.7808, "step": 16892 }, { "epoch": 0.5966912300376619, "grad_norm": 1.8096027374267578, "learning_rate": 3.692603201068146e-06, "loss": 0.8003, "step": 16893 }, { "epoch": 0.5967265518413698, "grad_norm": 1.7264652252197266, "learning_rate": 3.692051100503474e-06, "loss": 0.7863, "step": 16894 }, { "epoch": 0.5967618736450777, "grad_norm": 1.56117844581604, "learning_rate": 3.6914990170569908e-06, "loss": 0.7582, "step": 16895 }, { "epoch": 0.5967971954487856, "grad_norm": 0.9396551847457886, "learning_rate": 3.690946950735924e-06, "loss": 0.5915, "step": 16896 }, { "epoch": 0.5968325172524935, "grad_norm": 1.987201452255249, "learning_rate": 3.690394901547497e-06, "loss": 0.7857, "step": 16897 }, { "epoch": 0.5968678390562014, "grad_norm": 1.7912558317184448, "learning_rate": 3.689842869498934e-06, "loss": 0.765, "step": 16898 }, { "epoch": 0.5969031608599094, "grad_norm": 1.7766832113265991, "learning_rate": 3.6892908545974628e-06, "loss": 0.8077, "step": 16899 }, { "epoch": 0.5969384826636173, "grad_norm": 2.345280170440674, "learning_rate": 3.6887388568503045e-06, "loss": 0.7826, "step": 16900 }, { "epoch": 0.5969738044673252, "grad_norm": 1.7667747735977173, "learning_rate": 3.6881868762646876e-06, "loss": 0.7866, "step": 16901 }, { "epoch": 0.597009126271033, "grad_norm": 1.7049896717071533, "learning_rate": 3.687634912847834e-06, "loss": 0.774, "step": 16902 }, { "epoch": 0.5970444480747409, "grad_norm": 1.654313564300537, "learning_rate": 3.687082966606966e-06, "loss": 0.7769, "step": 16903 }, { "epoch": 0.5970797698784488, "grad_norm": 1.5871365070343018, "learning_rate": 3.6865310375493115e-06, "loss": 0.7524, "step": 16904 }, { "epoch": 0.5971150916821567, "grad_norm": 1.649209976196289, "learning_rate": 3.685979125682092e-06, "loss": 0.8079, "step": 16905 }, { "epoch": 0.5971504134858646, "grad_norm": 1.7849771976470947, "learning_rate": 3.685427231012529e-06, "loss": 0.7986, "step": 16906 }, { "epoch": 0.5971857352895725, "grad_norm": 1.721754550933838, "learning_rate": 3.6848753535478486e-06, "loss": 0.8336, "step": 16907 }, { "epoch": 0.5972210570932804, "grad_norm": 1.8808387517929077, "learning_rate": 3.684323493295273e-06, "loss": 0.7735, "step": 16908 }, { "epoch": 0.5972563788969883, "grad_norm": 1.763975977897644, "learning_rate": 3.6837716502620234e-06, "loss": 0.7758, "step": 16909 }, { "epoch": 0.5972917007006963, "grad_norm": 1.5167230367660522, "learning_rate": 3.6832198244553226e-06, "loss": 0.7987, "step": 16910 }, { "epoch": 0.5973270225044042, "grad_norm": 1.589401125907898, "learning_rate": 3.6826680158823936e-06, "loss": 0.7753, "step": 16911 }, { "epoch": 0.5973623443081121, "grad_norm": 1.9442015886306763, "learning_rate": 3.682116224550458e-06, "loss": 0.7968, "step": 16912 }, { "epoch": 0.59739766611182, "grad_norm": 1.54232656955719, "learning_rate": 3.681564450466737e-06, "loss": 0.7616, "step": 16913 }, { "epoch": 0.5974329879155279, "grad_norm": 1.8569893836975098, "learning_rate": 3.6810126936384525e-06, "loss": 0.812, "step": 16914 }, { "epoch": 0.5974683097192358, "grad_norm": 1.6061210632324219, "learning_rate": 3.680460954072827e-06, "loss": 0.7817, "step": 16915 }, { "epoch": 0.5975036315229437, "grad_norm": 1.6519159078598022, "learning_rate": 3.679909231777079e-06, "loss": 0.7995, "step": 16916 }, { "epoch": 0.5975389533266516, "grad_norm": 1.558543086051941, "learning_rate": 3.6793575267584326e-06, "loss": 0.7733, "step": 16917 }, { "epoch": 0.5975742751303595, "grad_norm": 1.9161009788513184, "learning_rate": 3.6788058390241056e-06, "loss": 0.7861, "step": 16918 }, { "epoch": 0.5976095969340675, "grad_norm": 1.5398298501968384, "learning_rate": 3.678254168581319e-06, "loss": 0.7862, "step": 16919 }, { "epoch": 0.5976449187377754, "grad_norm": 1.5216481685638428, "learning_rate": 3.6777025154372938e-06, "loss": 0.7775, "step": 16920 }, { "epoch": 0.5976802405414833, "grad_norm": 1.602333903312683, "learning_rate": 3.67715087959925e-06, "loss": 0.7516, "step": 16921 }, { "epoch": 0.5977155623451912, "grad_norm": 1.6576132774353027, "learning_rate": 3.676599261074406e-06, "loss": 0.7796, "step": 16922 }, { "epoch": 0.5977508841488991, "grad_norm": 2.4997825622558594, "learning_rate": 3.6760476598699823e-06, "loss": 0.7517, "step": 16923 }, { "epoch": 0.597786205952607, "grad_norm": 1.5758942365646362, "learning_rate": 3.675496075993198e-06, "loss": 0.8023, "step": 16924 }, { "epoch": 0.5978215277563149, "grad_norm": 1.5233235359191895, "learning_rate": 3.6749445094512726e-06, "loss": 0.7599, "step": 16925 }, { "epoch": 0.5978568495600228, "grad_norm": 1.790924072265625, "learning_rate": 3.674392960251425e-06, "loss": 0.8025, "step": 16926 }, { "epoch": 0.5978921713637307, "grad_norm": 2.240356206893921, "learning_rate": 3.673841428400873e-06, "loss": 0.7826, "step": 16927 }, { "epoch": 0.5979274931674387, "grad_norm": 1.626466989517212, "learning_rate": 3.6732899139068345e-06, "loss": 0.7716, "step": 16928 }, { "epoch": 0.5979628149711465, "grad_norm": 1.7143135070800781, "learning_rate": 3.672738416776527e-06, "loss": 0.799, "step": 16929 }, { "epoch": 0.5979981367748544, "grad_norm": 1.5365830659866333, "learning_rate": 3.6721869370171707e-06, "loss": 0.7653, "step": 16930 }, { "epoch": 0.5980334585785623, "grad_norm": 1.7160240411758423, "learning_rate": 3.6716354746359827e-06, "loss": 0.8269, "step": 16931 }, { "epoch": 0.5980687803822702, "grad_norm": 1.591365933418274, "learning_rate": 3.6710840296401785e-06, "loss": 0.7649, "step": 16932 }, { "epoch": 0.5981041021859781, "grad_norm": 1.5113271474838257, "learning_rate": 3.6705326020369782e-06, "loss": 0.762, "step": 16933 }, { "epoch": 0.598139423989686, "grad_norm": 1.7551100254058838, "learning_rate": 3.6699811918335953e-06, "loss": 0.8014, "step": 16934 }, { "epoch": 0.5981747457933939, "grad_norm": 1.6702920198440552, "learning_rate": 3.6694297990372506e-06, "loss": 0.7904, "step": 16935 }, { "epoch": 0.5982100675971018, "grad_norm": 0.9080132842063904, "learning_rate": 3.668878423655158e-06, "loss": 0.5765, "step": 16936 }, { "epoch": 0.5982453894008097, "grad_norm": 1.6793395280838013, "learning_rate": 3.6683270656945335e-06, "loss": 0.7771, "step": 16937 }, { "epoch": 0.5982807112045176, "grad_norm": 1.7381430864334106, "learning_rate": 3.6677757251625945e-06, "loss": 0.7673, "step": 16938 }, { "epoch": 0.5983160330082256, "grad_norm": 1.7110515832901, "learning_rate": 3.667224402066557e-06, "loss": 0.8017, "step": 16939 }, { "epoch": 0.5983513548119335, "grad_norm": 1.575535774230957, "learning_rate": 3.6666730964136348e-06, "loss": 0.761, "step": 16940 }, { "epoch": 0.5983866766156414, "grad_norm": 1.9431371688842773, "learning_rate": 3.666121808211046e-06, "loss": 0.8018, "step": 16941 }, { "epoch": 0.5984219984193493, "grad_norm": 1.8381426334381104, "learning_rate": 3.6655705374660035e-06, "loss": 0.7727, "step": 16942 }, { "epoch": 0.5984573202230572, "grad_norm": 1.7768155336380005, "learning_rate": 3.6650192841857223e-06, "loss": 0.7721, "step": 16943 }, { "epoch": 0.5984926420267651, "grad_norm": 1.6330369710922241, "learning_rate": 3.6644680483774188e-06, "loss": 0.7759, "step": 16944 }, { "epoch": 0.598527963830473, "grad_norm": 1.8326570987701416, "learning_rate": 3.6639168300483076e-06, "loss": 0.797, "step": 16945 }, { "epoch": 0.5985632856341809, "grad_norm": 1.8116226196289062, "learning_rate": 3.6633656292056004e-06, "loss": 0.7594, "step": 16946 }, { "epoch": 0.5985986074378888, "grad_norm": 1.843843936920166, "learning_rate": 3.6628144458565118e-06, "loss": 0.7934, "step": 16947 }, { "epoch": 0.5986339292415968, "grad_norm": 1.795526146888733, "learning_rate": 3.6622632800082567e-06, "loss": 0.8005, "step": 16948 }, { "epoch": 0.5986692510453047, "grad_norm": 1.6942217350006104, "learning_rate": 3.6617121316680495e-06, "loss": 0.7993, "step": 16949 }, { "epoch": 0.5987045728490126, "grad_norm": 1.5629584789276123, "learning_rate": 3.6611610008431004e-06, "loss": 0.78, "step": 16950 }, { "epoch": 0.5987398946527205, "grad_norm": 1.7890043258666992, "learning_rate": 3.660609887540626e-06, "loss": 0.7729, "step": 16951 }, { "epoch": 0.5987752164564284, "grad_norm": 1.768512487411499, "learning_rate": 3.660058791767837e-06, "loss": 0.7766, "step": 16952 }, { "epoch": 0.5988105382601363, "grad_norm": 1.7177364826202393, "learning_rate": 3.6595077135319462e-06, "loss": 0.7766, "step": 16953 }, { "epoch": 0.5988458600638442, "grad_norm": 1.7691506147384644, "learning_rate": 3.6589566528401677e-06, "loss": 0.8012, "step": 16954 }, { "epoch": 0.598881181867552, "grad_norm": 1.6824240684509277, "learning_rate": 3.658405609699712e-06, "loss": 0.8007, "step": 16955 }, { "epoch": 0.5989165036712599, "grad_norm": 1.8419052362442017, "learning_rate": 3.6578545841177903e-06, "loss": 0.7725, "step": 16956 }, { "epoch": 0.5989518254749678, "grad_norm": 1.697943925857544, "learning_rate": 3.6573035761016164e-06, "loss": 0.7807, "step": 16957 }, { "epoch": 0.5989871472786757, "grad_norm": 1.5463916063308716, "learning_rate": 3.6567525856584005e-06, "loss": 0.7748, "step": 16958 }, { "epoch": 0.5990224690823837, "grad_norm": 1.7783358097076416, "learning_rate": 3.6562016127953548e-06, "loss": 0.82, "step": 16959 }, { "epoch": 0.5990577908860916, "grad_norm": 2.3767480850219727, "learning_rate": 3.6556506575196903e-06, "loss": 0.7814, "step": 16960 }, { "epoch": 0.5990931126897995, "grad_norm": 1.8348867893218994, "learning_rate": 3.655099719838616e-06, "loss": 0.8051, "step": 16961 }, { "epoch": 0.5991284344935074, "grad_norm": 1.8946492671966553, "learning_rate": 3.6545487997593443e-06, "loss": 0.82, "step": 16962 }, { "epoch": 0.5991637562972153, "grad_norm": 1.51949143409729, "learning_rate": 3.653997897289087e-06, "loss": 0.783, "step": 16963 }, { "epoch": 0.5991990781009232, "grad_norm": 1.480607271194458, "learning_rate": 3.65344701243505e-06, "loss": 0.784, "step": 16964 }, { "epoch": 0.5992343999046311, "grad_norm": 1.5760999917984009, "learning_rate": 3.6528961452044463e-06, "loss": 0.7869, "step": 16965 }, { "epoch": 0.599269721708339, "grad_norm": 1.632332682609558, "learning_rate": 3.652345295604483e-06, "loss": 0.7999, "step": 16966 }, { "epoch": 0.5993050435120469, "grad_norm": 1.6384966373443604, "learning_rate": 3.651794463642373e-06, "loss": 0.783, "step": 16967 }, { "epoch": 0.5993403653157549, "grad_norm": 1.662499189376831, "learning_rate": 3.6512436493253218e-06, "loss": 0.7938, "step": 16968 }, { "epoch": 0.5993756871194628, "grad_norm": 1.745595097541809, "learning_rate": 3.6506928526605412e-06, "loss": 0.7703, "step": 16969 }, { "epoch": 0.5994110089231707, "grad_norm": 1.583825707435608, "learning_rate": 3.6501420736552393e-06, "loss": 0.784, "step": 16970 }, { "epoch": 0.5994463307268786, "grad_norm": 1.6801725625991821, "learning_rate": 3.6495913123166226e-06, "loss": 0.7839, "step": 16971 }, { "epoch": 0.5994816525305865, "grad_norm": 1.6700950860977173, "learning_rate": 3.6490405686519027e-06, "loss": 0.7952, "step": 16972 }, { "epoch": 0.5995169743342944, "grad_norm": 1.6916882991790771, "learning_rate": 3.648489842668286e-06, "loss": 0.8176, "step": 16973 }, { "epoch": 0.5995522961380023, "grad_norm": 1.772223949432373, "learning_rate": 3.647939134372979e-06, "loss": 0.7703, "step": 16974 }, { "epoch": 0.5995876179417102, "grad_norm": 3.4539873600006104, "learning_rate": 3.647388443773191e-06, "loss": 0.7668, "step": 16975 }, { "epoch": 0.5996229397454181, "grad_norm": 1.6673500537872314, "learning_rate": 3.646837770876129e-06, "loss": 0.7885, "step": 16976 }, { "epoch": 0.599658261549126, "grad_norm": 1.5764939785003662, "learning_rate": 3.646287115688999e-06, "loss": 0.7806, "step": 16977 }, { "epoch": 0.599693583352834, "grad_norm": 1.5480109453201294, "learning_rate": 3.645736478219009e-06, "loss": 0.7949, "step": 16978 }, { "epoch": 0.5997289051565419, "grad_norm": 1.971132516860962, "learning_rate": 3.645185858473366e-06, "loss": 0.7948, "step": 16979 }, { "epoch": 0.5997642269602498, "grad_norm": 1.9768294095993042, "learning_rate": 3.644635256459275e-06, "loss": 0.786, "step": 16980 }, { "epoch": 0.5997995487639576, "grad_norm": 1.6918491125106812, "learning_rate": 3.644084672183944e-06, "loss": 0.7689, "step": 16981 }, { "epoch": 0.5998348705676655, "grad_norm": 1.687572717666626, "learning_rate": 3.6435341056545793e-06, "loss": 0.793, "step": 16982 }, { "epoch": 0.5998701923713734, "grad_norm": 1.6249518394470215, "learning_rate": 3.6429835568783835e-06, "loss": 0.7946, "step": 16983 }, { "epoch": 0.5999055141750813, "grad_norm": 4.480093479156494, "learning_rate": 3.6424330258625628e-06, "loss": 0.8112, "step": 16984 }, { "epoch": 0.5999408359787892, "grad_norm": 1.7676454782485962, "learning_rate": 3.641882512614324e-06, "loss": 0.7987, "step": 16985 }, { "epoch": 0.5999761577824971, "grad_norm": 1.5726269483566284, "learning_rate": 3.6413320171408728e-06, "loss": 0.7995, "step": 16986 }, { "epoch": 0.600011479586205, "grad_norm": 1.6910948753356934, "learning_rate": 3.640781539449411e-06, "loss": 0.7956, "step": 16987 }, { "epoch": 0.600046801389913, "grad_norm": 1.6327311992645264, "learning_rate": 3.640231079547146e-06, "loss": 0.7535, "step": 16988 }, { "epoch": 0.6000821231936209, "grad_norm": 1.5047329664230347, "learning_rate": 3.6396806374412817e-06, "loss": 0.8016, "step": 16989 }, { "epoch": 0.6001174449973288, "grad_norm": 1.7601462602615356, "learning_rate": 3.6391302131390195e-06, "loss": 0.7921, "step": 16990 }, { "epoch": 0.6001527668010367, "grad_norm": 1.8165723085403442, "learning_rate": 3.638579806647567e-06, "loss": 0.7766, "step": 16991 }, { "epoch": 0.6001880886047446, "grad_norm": 1.629328727722168, "learning_rate": 3.6380294179741248e-06, "loss": 0.7474, "step": 16992 }, { "epoch": 0.6002234104084525, "grad_norm": 1.7716388702392578, "learning_rate": 3.6374790471258986e-06, "loss": 0.7698, "step": 16993 }, { "epoch": 0.6002587322121604, "grad_norm": 1.896381139755249, "learning_rate": 3.63692869411009e-06, "loss": 0.7813, "step": 16994 }, { "epoch": 0.6002940540158683, "grad_norm": 1.6448931694030762, "learning_rate": 3.6363783589339018e-06, "loss": 0.8014, "step": 16995 }, { "epoch": 0.6003293758195762, "grad_norm": 1.5611990690231323, "learning_rate": 3.635828041604539e-06, "loss": 0.7996, "step": 16996 }, { "epoch": 0.6003646976232841, "grad_norm": 1.719233751296997, "learning_rate": 3.635277742129202e-06, "loss": 0.8016, "step": 16997 }, { "epoch": 0.6004000194269921, "grad_norm": 1.6620720624923706, "learning_rate": 3.6347274605150915e-06, "loss": 0.7969, "step": 16998 }, { "epoch": 0.6004353412307, "grad_norm": 1.6685988903045654, "learning_rate": 3.6341771967694127e-06, "loss": 0.762, "step": 16999 }, { "epoch": 0.6004706630344079, "grad_norm": 2.0812299251556396, "learning_rate": 3.6336269508993677e-06, "loss": 0.7461, "step": 17000 }, { "epoch": 0.6005059848381158, "grad_norm": 1.7743042707443237, "learning_rate": 3.633076722912155e-06, "loss": 0.865, "step": 17001 }, { "epoch": 0.6005413066418237, "grad_norm": 1.5842605829238892, "learning_rate": 3.6325265128149757e-06, "loss": 0.787, "step": 17002 }, { "epoch": 0.6005766284455316, "grad_norm": 1.715927243232727, "learning_rate": 3.6319763206150338e-06, "loss": 0.7813, "step": 17003 }, { "epoch": 0.6006119502492395, "grad_norm": 1.7596501111984253, "learning_rate": 3.6314261463195286e-06, "loss": 0.7706, "step": 17004 }, { "epoch": 0.6006472720529474, "grad_norm": 1.7672606706619263, "learning_rate": 3.6308759899356596e-06, "loss": 0.7759, "step": 17005 }, { "epoch": 0.6006825938566553, "grad_norm": 1.7655586004257202, "learning_rate": 3.6303258514706298e-06, "loss": 0.764, "step": 17006 }, { "epoch": 0.6007179156603631, "grad_norm": 1.9140437841415405, "learning_rate": 3.6297757309316374e-06, "loss": 0.7906, "step": 17007 }, { "epoch": 0.600753237464071, "grad_norm": 1.600367546081543, "learning_rate": 3.629225628325881e-06, "loss": 0.7575, "step": 17008 }, { "epoch": 0.600788559267779, "grad_norm": 1.868778944015503, "learning_rate": 3.6286755436605637e-06, "loss": 0.7801, "step": 17009 }, { "epoch": 0.6008238810714869, "grad_norm": 1.6666756868362427, "learning_rate": 3.628125476942883e-06, "loss": 0.7818, "step": 17010 }, { "epoch": 0.6008592028751948, "grad_norm": 1.6697638034820557, "learning_rate": 3.627575428180037e-06, "loss": 0.7682, "step": 17011 }, { "epoch": 0.6008945246789027, "grad_norm": 1.6261247396469116, "learning_rate": 3.6270253973792267e-06, "loss": 0.7946, "step": 17012 }, { "epoch": 0.6009298464826106, "grad_norm": 1.8002763986587524, "learning_rate": 3.626475384547651e-06, "loss": 0.7806, "step": 17013 }, { "epoch": 0.6009651682863185, "grad_norm": 1.8336613178253174, "learning_rate": 3.625925389692505e-06, "loss": 0.7772, "step": 17014 }, { "epoch": 0.6010004900900264, "grad_norm": 1.8085532188415527, "learning_rate": 3.6253754128209907e-06, "loss": 0.7566, "step": 17015 }, { "epoch": 0.6010358118937343, "grad_norm": 1.7285487651824951, "learning_rate": 3.6248254539403026e-06, "loss": 0.7734, "step": 17016 }, { "epoch": 0.6010711336974423, "grad_norm": 0.9425053596496582, "learning_rate": 3.624275513057643e-06, "loss": 0.5634, "step": 17017 }, { "epoch": 0.6011064555011502, "grad_norm": 1.5438027381896973, "learning_rate": 3.623725590180206e-06, "loss": 0.759, "step": 17018 }, { "epoch": 0.6011417773048581, "grad_norm": 1.6838454008102417, "learning_rate": 3.6231756853151912e-06, "loss": 0.7955, "step": 17019 }, { "epoch": 0.601177099108566, "grad_norm": 2.2332069873809814, "learning_rate": 3.6226257984697933e-06, "loss": 0.785, "step": 17020 }, { "epoch": 0.6012124209122739, "grad_norm": 1.6663702726364136, "learning_rate": 3.6220759296512085e-06, "loss": 0.7798, "step": 17021 }, { "epoch": 0.6012477427159818, "grad_norm": 2.8992345333099365, "learning_rate": 3.621526078866636e-06, "loss": 0.8019, "step": 17022 }, { "epoch": 0.6012830645196897, "grad_norm": 1.8407188653945923, "learning_rate": 3.620976246123271e-06, "loss": 0.7612, "step": 17023 }, { "epoch": 0.6013183863233976, "grad_norm": 1.525675892829895, "learning_rate": 3.620426431428309e-06, "loss": 0.7722, "step": 17024 }, { "epoch": 0.6013537081271055, "grad_norm": 1.806743860244751, "learning_rate": 3.6198766347889477e-06, "loss": 0.8086, "step": 17025 }, { "epoch": 0.6013890299308134, "grad_norm": 1.9096699953079224, "learning_rate": 3.6193268562123794e-06, "loss": 0.7814, "step": 17026 }, { "epoch": 0.6014243517345214, "grad_norm": 1.8052600622177124, "learning_rate": 3.618777095705804e-06, "loss": 0.7369, "step": 17027 }, { "epoch": 0.6014596735382293, "grad_norm": 1.663930058479309, "learning_rate": 3.618227353276414e-06, "loss": 0.7857, "step": 17028 }, { "epoch": 0.6014949953419372, "grad_norm": 1.5680351257324219, "learning_rate": 3.617677628931403e-06, "loss": 0.7492, "step": 17029 }, { "epoch": 0.6015303171456451, "grad_norm": 1.6863501071929932, "learning_rate": 3.6171279226779693e-06, "loss": 0.8037, "step": 17030 }, { "epoch": 0.601565638949353, "grad_norm": 1.8110911846160889, "learning_rate": 3.616578234523305e-06, "loss": 0.7609, "step": 17031 }, { "epoch": 0.6016009607530609, "grad_norm": 1.6824991703033447, "learning_rate": 3.6160285644746036e-06, "loss": 0.7939, "step": 17032 }, { "epoch": 0.6016362825567687, "grad_norm": 1.557906150817871, "learning_rate": 3.615478912539061e-06, "loss": 0.7634, "step": 17033 }, { "epoch": 0.6016716043604766, "grad_norm": 1.6888607740402222, "learning_rate": 3.6149292787238706e-06, "loss": 0.7615, "step": 17034 }, { "epoch": 0.6017069261641845, "grad_norm": 1.8603445291519165, "learning_rate": 3.614379663036224e-06, "loss": 0.8133, "step": 17035 }, { "epoch": 0.6017422479678924, "grad_norm": 1.6196622848510742, "learning_rate": 3.613830065483317e-06, "loss": 0.7578, "step": 17036 }, { "epoch": 0.6017775697716004, "grad_norm": 1.7663445472717285, "learning_rate": 3.613280486072343e-06, "loss": 0.799, "step": 17037 }, { "epoch": 0.6018128915753083, "grad_norm": 1.607869267463684, "learning_rate": 3.612730924810492e-06, "loss": 0.7776, "step": 17038 }, { "epoch": 0.6018482133790162, "grad_norm": 1.5716286897659302, "learning_rate": 3.6121813817049567e-06, "loss": 0.7451, "step": 17039 }, { "epoch": 0.6018835351827241, "grad_norm": 1.6534743309020996, "learning_rate": 3.6116318567629317e-06, "loss": 0.7545, "step": 17040 }, { "epoch": 0.601918856986432, "grad_norm": 1.5840351581573486, "learning_rate": 3.611082349991608e-06, "loss": 0.7883, "step": 17041 }, { "epoch": 0.6019541787901399, "grad_norm": 1.581735610961914, "learning_rate": 3.6105328613981767e-06, "loss": 0.7642, "step": 17042 }, { "epoch": 0.6019895005938478, "grad_norm": 1.7125612497329712, "learning_rate": 3.6099833909898307e-06, "loss": 0.8095, "step": 17043 }, { "epoch": 0.6020248223975557, "grad_norm": 1.512192964553833, "learning_rate": 3.609433938773761e-06, "loss": 0.779, "step": 17044 }, { "epoch": 0.6020601442012636, "grad_norm": 1.7463499307632446, "learning_rate": 3.6088845047571577e-06, "loss": 0.7601, "step": 17045 }, { "epoch": 0.6020954660049715, "grad_norm": 1.6196861267089844, "learning_rate": 3.608335088947213e-06, "loss": 0.7848, "step": 17046 }, { "epoch": 0.6021307878086795, "grad_norm": 1.7768973112106323, "learning_rate": 3.607785691351117e-06, "loss": 0.7662, "step": 17047 }, { "epoch": 0.6021661096123874, "grad_norm": 0.8826690912246704, "learning_rate": 3.60723631197606e-06, "loss": 0.5781, "step": 17048 }, { "epoch": 0.6022014314160953, "grad_norm": 1.650357723236084, "learning_rate": 3.6066869508292326e-06, "loss": 0.7988, "step": 17049 }, { "epoch": 0.6022367532198032, "grad_norm": 1.59717857837677, "learning_rate": 3.6061376079178235e-06, "loss": 0.765, "step": 17050 }, { "epoch": 0.6022720750235111, "grad_norm": 1.6083916425704956, "learning_rate": 3.6055882832490245e-06, "loss": 0.7775, "step": 17051 }, { "epoch": 0.602307396827219, "grad_norm": 1.6604012250900269, "learning_rate": 3.6050389768300242e-06, "loss": 0.7825, "step": 17052 }, { "epoch": 0.6023427186309269, "grad_norm": 1.8380939960479736, "learning_rate": 3.6044896886680102e-06, "loss": 0.811, "step": 17053 }, { "epoch": 0.6023780404346348, "grad_norm": 1.582306981086731, "learning_rate": 3.6039404187701742e-06, "loss": 0.7532, "step": 17054 }, { "epoch": 0.6024133622383427, "grad_norm": 1.6644397974014282, "learning_rate": 3.6033911671437033e-06, "loss": 0.7652, "step": 17055 }, { "epoch": 0.6024486840420507, "grad_norm": 1.653779149055481, "learning_rate": 3.6028419337957875e-06, "loss": 0.8156, "step": 17056 }, { "epoch": 0.6024840058457586, "grad_norm": 1.7623465061187744, "learning_rate": 3.602292718733612e-06, "loss": 0.7842, "step": 17057 }, { "epoch": 0.6025193276494665, "grad_norm": 1.8191688060760498, "learning_rate": 3.6017435219643665e-06, "loss": 0.7968, "step": 17058 }, { "epoch": 0.6025546494531743, "grad_norm": 1.8658579587936401, "learning_rate": 3.60119434349524e-06, "loss": 0.8528, "step": 17059 }, { "epoch": 0.6025899712568822, "grad_norm": 1.8805984258651733, "learning_rate": 3.6006451833334176e-06, "loss": 0.8069, "step": 17060 }, { "epoch": 0.6026252930605901, "grad_norm": 1.6569575071334839, "learning_rate": 3.6000960414860893e-06, "loss": 0.7645, "step": 17061 }, { "epoch": 0.602660614864298, "grad_norm": 1.681671142578125, "learning_rate": 3.5995469179604403e-06, "loss": 0.7708, "step": 17062 }, { "epoch": 0.6026959366680059, "grad_norm": 2.5197768211364746, "learning_rate": 3.5989978127636572e-06, "loss": 0.7597, "step": 17063 }, { "epoch": 0.6027312584717138, "grad_norm": 1.9527027606964111, "learning_rate": 3.598448725902929e-06, "loss": 0.7815, "step": 17064 }, { "epoch": 0.6027665802754217, "grad_norm": 1.6828491687774658, "learning_rate": 3.5978996573854396e-06, "loss": 0.8091, "step": 17065 }, { "epoch": 0.6028019020791296, "grad_norm": 1.5489305257797241, "learning_rate": 3.597350607218375e-06, "loss": 0.8082, "step": 17066 }, { "epoch": 0.6028372238828376, "grad_norm": 1.5514609813690186, "learning_rate": 3.5968015754089237e-06, "loss": 0.7816, "step": 17067 }, { "epoch": 0.6028725456865455, "grad_norm": 1.6666135787963867, "learning_rate": 3.5962525619642686e-06, "loss": 0.7496, "step": 17068 }, { "epoch": 0.6029078674902534, "grad_norm": 2.0420212745666504, "learning_rate": 3.5957035668915964e-06, "loss": 0.7632, "step": 17069 }, { "epoch": 0.6029431892939613, "grad_norm": 1.6632999181747437, "learning_rate": 3.5951545901980915e-06, "loss": 0.7612, "step": 17070 }, { "epoch": 0.6029785110976692, "grad_norm": 1.5898566246032715, "learning_rate": 3.5946056318909402e-06, "loss": 0.7998, "step": 17071 }, { "epoch": 0.6030138329013771, "grad_norm": 2.214991331100464, "learning_rate": 3.5940566919773246e-06, "loss": 0.7801, "step": 17072 }, { "epoch": 0.603049154705085, "grad_norm": 1.5108000040054321, "learning_rate": 3.5935077704644324e-06, "loss": 0.7565, "step": 17073 }, { "epoch": 0.6030844765087929, "grad_norm": 1.919985294342041, "learning_rate": 3.592958867359446e-06, "loss": 0.8082, "step": 17074 }, { "epoch": 0.6031197983125008, "grad_norm": 1.6682566404342651, "learning_rate": 3.59240998266955e-06, "loss": 0.7794, "step": 17075 }, { "epoch": 0.6031551201162088, "grad_norm": 1.7309679985046387, "learning_rate": 3.591861116401926e-06, "loss": 0.7694, "step": 17076 }, { "epoch": 0.6031904419199167, "grad_norm": 2.4032416343688965, "learning_rate": 3.591312268563759e-06, "loss": 0.8001, "step": 17077 }, { "epoch": 0.6032257637236246, "grad_norm": 1.7843692302703857, "learning_rate": 3.590763439162233e-06, "loss": 0.775, "step": 17078 }, { "epoch": 0.6032610855273325, "grad_norm": 2.286940813064575, "learning_rate": 3.5902146282045293e-06, "loss": 0.8103, "step": 17079 }, { "epoch": 0.6032964073310404, "grad_norm": 1.5303782224655151, "learning_rate": 3.5896658356978327e-06, "loss": 0.7585, "step": 17080 }, { "epoch": 0.6033317291347483, "grad_norm": 2.462721824645996, "learning_rate": 3.5891170616493245e-06, "loss": 0.8368, "step": 17081 }, { "epoch": 0.6033670509384562, "grad_norm": 1.595204472541809, "learning_rate": 3.5885683060661853e-06, "loss": 0.7628, "step": 17082 }, { "epoch": 0.6034023727421641, "grad_norm": 1.7142610549926758, "learning_rate": 3.5880195689556e-06, "loss": 0.7853, "step": 17083 }, { "epoch": 0.603437694545872, "grad_norm": 1.814205527305603, "learning_rate": 3.5874708503247485e-06, "loss": 0.7588, "step": 17084 }, { "epoch": 0.6034730163495798, "grad_norm": 1.736182451248169, "learning_rate": 3.586922150180814e-06, "loss": 0.7663, "step": 17085 }, { "epoch": 0.6035083381532877, "grad_norm": 1.7394134998321533, "learning_rate": 3.5863734685309766e-06, "loss": 0.7985, "step": 17086 }, { "epoch": 0.6035436599569957, "grad_norm": 1.6866000890731812, "learning_rate": 3.5858248053824164e-06, "loss": 0.7853, "step": 17087 }, { "epoch": 0.6035789817607036, "grad_norm": 1.6304186582565308, "learning_rate": 3.5852761607423165e-06, "loss": 0.7616, "step": 17088 }, { "epoch": 0.6036143035644115, "grad_norm": 1.7021974325180054, "learning_rate": 3.5847275346178562e-06, "loss": 0.7778, "step": 17089 }, { "epoch": 0.6036496253681194, "grad_norm": 1.8714853525161743, "learning_rate": 3.5841789270162146e-06, "loss": 0.823, "step": 17090 }, { "epoch": 0.6036849471718273, "grad_norm": 2.2073073387145996, "learning_rate": 3.583630337944575e-06, "loss": 0.7394, "step": 17091 }, { "epoch": 0.6037202689755352, "grad_norm": 1.5378625392913818, "learning_rate": 3.5830817674101144e-06, "loss": 0.7525, "step": 17092 }, { "epoch": 0.6037555907792431, "grad_norm": 1.6276946067810059, "learning_rate": 3.5825332154200143e-06, "loss": 0.811, "step": 17093 }, { "epoch": 0.603790912582951, "grad_norm": 1.784685492515564, "learning_rate": 3.581984681981451e-06, "loss": 0.7775, "step": 17094 }, { "epoch": 0.6038262343866589, "grad_norm": 1.6011090278625488, "learning_rate": 3.5814361671016073e-06, "loss": 0.7551, "step": 17095 }, { "epoch": 0.6038615561903669, "grad_norm": 1.8098564147949219, "learning_rate": 3.5808876707876596e-06, "loss": 0.7771, "step": 17096 }, { "epoch": 0.6038968779940748, "grad_norm": 1.7575442790985107, "learning_rate": 3.5803391930467867e-06, "loss": 0.7986, "step": 17097 }, { "epoch": 0.6039321997977827, "grad_norm": 1.5301107168197632, "learning_rate": 3.5797907338861686e-06, "loss": 0.7976, "step": 17098 }, { "epoch": 0.6039675216014906, "grad_norm": 1.4777922630310059, "learning_rate": 3.579242293312982e-06, "loss": 0.7646, "step": 17099 }, { "epoch": 0.6040028434051985, "grad_norm": 1.5377545356750488, "learning_rate": 3.578693871334405e-06, "loss": 0.7847, "step": 17100 }, { "epoch": 0.6040381652089064, "grad_norm": 1.7516593933105469, "learning_rate": 3.5781454679576156e-06, "loss": 0.7955, "step": 17101 }, { "epoch": 0.6040734870126143, "grad_norm": 1.6704953908920288, "learning_rate": 3.5775970831897917e-06, "loss": 0.8011, "step": 17102 }, { "epoch": 0.6041088088163222, "grad_norm": 1.8284045457839966, "learning_rate": 3.5770487170381087e-06, "loss": 0.7709, "step": 17103 }, { "epoch": 0.6041441306200301, "grad_norm": 1.9290426969528198, "learning_rate": 3.576500369509745e-06, "loss": 0.763, "step": 17104 }, { "epoch": 0.604179452423738, "grad_norm": 1.68405282497406, "learning_rate": 3.5759520406118774e-06, "loss": 0.7979, "step": 17105 }, { "epoch": 0.604214774227446, "grad_norm": 1.6011359691619873, "learning_rate": 3.5754037303516808e-06, "loss": 0.7794, "step": 17106 }, { "epoch": 0.6042500960311539, "grad_norm": 1.571621060371399, "learning_rate": 3.574855438736333e-06, "loss": 0.7682, "step": 17107 }, { "epoch": 0.6042854178348618, "grad_norm": 1.6731956005096436, "learning_rate": 3.574307165773009e-06, "loss": 0.7607, "step": 17108 }, { "epoch": 0.6043207396385697, "grad_norm": 1.7250642776489258, "learning_rate": 3.5737589114688854e-06, "loss": 0.7775, "step": 17109 }, { "epoch": 0.6043560614422776, "grad_norm": 1.577377200126648, "learning_rate": 3.573210675831137e-06, "loss": 0.7663, "step": 17110 }, { "epoch": 0.6043913832459854, "grad_norm": 1.4824410676956177, "learning_rate": 3.57266245886694e-06, "loss": 0.7532, "step": 17111 }, { "epoch": 0.6044267050496933, "grad_norm": 1.8295888900756836, "learning_rate": 3.5721142605834673e-06, "loss": 0.8057, "step": 17112 }, { "epoch": 0.6044620268534012, "grad_norm": 1.629310965538025, "learning_rate": 3.5715660809878937e-06, "loss": 0.7457, "step": 17113 }, { "epoch": 0.6044973486571091, "grad_norm": 1.6270767450332642, "learning_rate": 3.5710179200873964e-06, "loss": 0.7563, "step": 17114 }, { "epoch": 0.604532670460817, "grad_norm": 1.6819844245910645, "learning_rate": 3.5704697778891473e-06, "loss": 0.7773, "step": 17115 }, { "epoch": 0.604567992264525, "grad_norm": 1.9170894622802734, "learning_rate": 3.56992165440032e-06, "loss": 0.7764, "step": 17116 }, { "epoch": 0.6046033140682329, "grad_norm": 1.7846908569335938, "learning_rate": 3.569373549628091e-06, "loss": 0.7858, "step": 17117 }, { "epoch": 0.6046386358719408, "grad_norm": 1.7074272632598877, "learning_rate": 3.5688254635796303e-06, "loss": 0.761, "step": 17118 }, { "epoch": 0.6046739576756487, "grad_norm": 1.8240128755569458, "learning_rate": 3.5682773962621143e-06, "loss": 0.764, "step": 17119 }, { "epoch": 0.6047092794793566, "grad_norm": 1.6349016427993774, "learning_rate": 3.5677293476827145e-06, "loss": 0.7579, "step": 17120 }, { "epoch": 0.6047446012830645, "grad_norm": 1.6567376852035522, "learning_rate": 3.5671813178486027e-06, "loss": 0.7756, "step": 17121 }, { "epoch": 0.6047799230867724, "grad_norm": 2.241865634918213, "learning_rate": 3.5666333067669534e-06, "loss": 0.7767, "step": 17122 }, { "epoch": 0.6048152448904803, "grad_norm": 1.6929363012313843, "learning_rate": 3.5660853144449383e-06, "loss": 0.7796, "step": 17123 }, { "epoch": 0.6048505666941882, "grad_norm": 1.6147884130477905, "learning_rate": 3.5655373408897276e-06, "loss": 0.8046, "step": 17124 }, { "epoch": 0.6048858884978962, "grad_norm": 1.5153088569641113, "learning_rate": 3.564989386108496e-06, "loss": 0.7692, "step": 17125 }, { "epoch": 0.6049212103016041, "grad_norm": 1.6341005563735962, "learning_rate": 3.5644414501084134e-06, "loss": 0.7913, "step": 17126 }, { "epoch": 0.604956532105312, "grad_norm": 1.6184672117233276, "learning_rate": 3.5638935328966497e-06, "loss": 0.7817, "step": 17127 }, { "epoch": 0.6049918539090199, "grad_norm": 1.5817629098892212, "learning_rate": 3.563345634480379e-06, "loss": 0.7886, "step": 17128 }, { "epoch": 0.6050271757127278, "grad_norm": 1.6287826299667358, "learning_rate": 3.5627977548667715e-06, "loss": 0.8043, "step": 17129 }, { "epoch": 0.6050624975164357, "grad_norm": 1.6275219917297363, "learning_rate": 3.562249894062996e-06, "loss": 0.7399, "step": 17130 }, { "epoch": 0.6050978193201436, "grad_norm": 1.6964079141616821, "learning_rate": 3.5617020520762223e-06, "loss": 0.7283, "step": 17131 }, { "epoch": 0.6051331411238515, "grad_norm": 1.8686678409576416, "learning_rate": 3.561154228913623e-06, "loss": 0.7514, "step": 17132 }, { "epoch": 0.6051684629275594, "grad_norm": 2.1137917041778564, "learning_rate": 3.5606064245823668e-06, "loss": 0.8318, "step": 17133 }, { "epoch": 0.6052037847312673, "grad_norm": 1.8458564281463623, "learning_rate": 3.560058639089622e-06, "loss": 0.829, "step": 17134 }, { "epoch": 0.6052391065349753, "grad_norm": 1.8030816316604614, "learning_rate": 3.55951087244256e-06, "loss": 0.8124, "step": 17135 }, { "epoch": 0.6052744283386832, "grad_norm": 1.7692667245864868, "learning_rate": 3.5589631246483493e-06, "loss": 0.8003, "step": 17136 }, { "epoch": 0.605309750142391, "grad_norm": 1.7341359853744507, "learning_rate": 3.5584153957141565e-06, "loss": 0.7775, "step": 17137 }, { "epoch": 0.6053450719460989, "grad_norm": 1.5657895803451538, "learning_rate": 3.5578676856471533e-06, "loss": 0.7822, "step": 17138 }, { "epoch": 0.6053803937498068, "grad_norm": 1.6786785125732422, "learning_rate": 3.5573199944545066e-06, "loss": 0.7643, "step": 17139 }, { "epoch": 0.6054157155535147, "grad_norm": 1.5487301349639893, "learning_rate": 3.556772322143384e-06, "loss": 0.746, "step": 17140 }, { "epoch": 0.6054510373572226, "grad_norm": 1.8807036876678467, "learning_rate": 3.556224668720955e-06, "loss": 0.813, "step": 17141 }, { "epoch": 0.6054863591609305, "grad_norm": 1.749980092048645, "learning_rate": 3.5556770341943848e-06, "loss": 0.7872, "step": 17142 }, { "epoch": 0.6055216809646384, "grad_norm": 1.5889796018600464, "learning_rate": 3.5551294185708433e-06, "loss": 0.7701, "step": 17143 }, { "epoch": 0.6055570027683463, "grad_norm": 1.6687885522842407, "learning_rate": 3.5545818218574966e-06, "loss": 0.7695, "step": 17144 }, { "epoch": 0.6055923245720543, "grad_norm": 1.7646749019622803, "learning_rate": 3.55403424406151e-06, "loss": 0.8024, "step": 17145 }, { "epoch": 0.6056276463757622, "grad_norm": 1.6217372417449951, "learning_rate": 3.553486685190053e-06, "loss": 0.7949, "step": 17146 }, { "epoch": 0.6056629681794701, "grad_norm": 1.5091054439544678, "learning_rate": 3.5529391452502893e-06, "loss": 0.7658, "step": 17147 }, { "epoch": 0.605698289983178, "grad_norm": 1.8227142095565796, "learning_rate": 3.552391624249388e-06, "loss": 0.7935, "step": 17148 }, { "epoch": 0.6057336117868859, "grad_norm": 1.7311789989471436, "learning_rate": 3.551844122194511e-06, "loss": 0.7823, "step": 17149 }, { "epoch": 0.6057689335905938, "grad_norm": 1.5521365404129028, "learning_rate": 3.551296639092826e-06, "loss": 0.8009, "step": 17150 }, { "epoch": 0.6058042553943017, "grad_norm": 1.8423528671264648, "learning_rate": 3.5507491749514984e-06, "loss": 0.7706, "step": 17151 }, { "epoch": 0.6058395771980096, "grad_norm": 1.8198611736297607, "learning_rate": 3.5502017297776924e-06, "loss": 0.7691, "step": 17152 }, { "epoch": 0.6058748990017175, "grad_norm": 1.6941537857055664, "learning_rate": 3.5496543035785748e-06, "loss": 0.8042, "step": 17153 }, { "epoch": 0.6059102208054254, "grad_norm": 1.6011122465133667, "learning_rate": 3.549106896361309e-06, "loss": 0.7253, "step": 17154 }, { "epoch": 0.6059455426091334, "grad_norm": 1.6242066621780396, "learning_rate": 3.548559508133058e-06, "loss": 0.7712, "step": 17155 }, { "epoch": 0.6059808644128413, "grad_norm": 1.687178134918213, "learning_rate": 3.5480121389009893e-06, "loss": 0.7974, "step": 17156 }, { "epoch": 0.6060161862165492, "grad_norm": 1.5937700271606445, "learning_rate": 3.5474647886722635e-06, "loss": 0.801, "step": 17157 }, { "epoch": 0.6060515080202571, "grad_norm": 1.5829886198043823, "learning_rate": 3.5469174574540456e-06, "loss": 0.7375, "step": 17158 }, { "epoch": 0.606086829823965, "grad_norm": 1.91447114944458, "learning_rate": 3.5463701452534992e-06, "loss": 0.7896, "step": 17159 }, { "epoch": 0.6061221516276729, "grad_norm": 1.626920461654663, "learning_rate": 3.545822852077787e-06, "loss": 0.7558, "step": 17160 }, { "epoch": 0.6061574734313808, "grad_norm": 1.5747345685958862, "learning_rate": 3.545275577934072e-06, "loss": 0.7962, "step": 17161 }, { "epoch": 0.6061927952350887, "grad_norm": 1.856824278831482, "learning_rate": 3.544728322829517e-06, "loss": 0.7979, "step": 17162 }, { "epoch": 0.6062281170387965, "grad_norm": 1.6558042764663696, "learning_rate": 3.5441810867712837e-06, "loss": 0.7999, "step": 17163 }, { "epoch": 0.6062634388425044, "grad_norm": 1.771875023841858, "learning_rate": 3.543633869766534e-06, "loss": 0.7674, "step": 17164 }, { "epoch": 0.6062987606462124, "grad_norm": 1.596642255783081, "learning_rate": 3.543086671822432e-06, "loss": 0.7838, "step": 17165 }, { "epoch": 0.6063340824499203, "grad_norm": 1.7381446361541748, "learning_rate": 3.542539492946139e-06, "loss": 0.7915, "step": 17166 }, { "epoch": 0.6063694042536282, "grad_norm": 1.8388104438781738, "learning_rate": 3.541992333144813e-06, "loss": 0.7477, "step": 17167 }, { "epoch": 0.6064047260573361, "grad_norm": 1.5981652736663818, "learning_rate": 3.5414451924256164e-06, "loss": 0.7871, "step": 17168 }, { "epoch": 0.606440047861044, "grad_norm": 1.7078361511230469, "learning_rate": 3.540898070795713e-06, "loss": 0.7846, "step": 17169 }, { "epoch": 0.6064753696647519, "grad_norm": 2.109368324279785, "learning_rate": 3.5403509682622605e-06, "loss": 0.785, "step": 17170 }, { "epoch": 0.6065106914684598, "grad_norm": 1.7166649103164673, "learning_rate": 3.539803884832419e-06, "loss": 0.7744, "step": 17171 }, { "epoch": 0.6065460132721677, "grad_norm": 1.7416480779647827, "learning_rate": 3.5392568205133513e-06, "loss": 0.776, "step": 17172 }, { "epoch": 0.6065813350758756, "grad_norm": 14.641419410705566, "learning_rate": 3.5387097753122156e-06, "loss": 0.7338, "step": 17173 }, { "epoch": 0.6066166568795835, "grad_norm": 2.3765485286712646, "learning_rate": 3.5381627492361703e-06, "loss": 0.8261, "step": 17174 }, { "epoch": 0.6066519786832915, "grad_norm": 1.5989280939102173, "learning_rate": 3.5376157422923775e-06, "loss": 0.7681, "step": 17175 }, { "epoch": 0.6066873004869994, "grad_norm": 1.6681758165359497, "learning_rate": 3.5370687544879947e-06, "loss": 0.8055, "step": 17176 }, { "epoch": 0.6067226222907073, "grad_norm": 1.8393456935882568, "learning_rate": 3.5365217858301805e-06, "loss": 0.7653, "step": 17177 }, { "epoch": 0.6067579440944152, "grad_norm": 1.689245343208313, "learning_rate": 3.535974836326095e-06, "loss": 0.7834, "step": 17178 }, { "epoch": 0.6067932658981231, "grad_norm": 1.8108774423599243, "learning_rate": 3.5354279059828945e-06, "loss": 0.7708, "step": 17179 }, { "epoch": 0.606828587701831, "grad_norm": 1.6175390481948853, "learning_rate": 3.5348809948077388e-06, "loss": 0.7567, "step": 17180 }, { "epoch": 0.6068639095055389, "grad_norm": 1.7513792514801025, "learning_rate": 3.534334102807786e-06, "loss": 0.8045, "step": 17181 }, { "epoch": 0.6068992313092468, "grad_norm": 1.9740393161773682, "learning_rate": 3.533787229990192e-06, "loss": 0.7591, "step": 17182 }, { "epoch": 0.6069345531129547, "grad_norm": 1.7603461742401123, "learning_rate": 3.5332403763621153e-06, "loss": 0.8089, "step": 17183 }, { "epoch": 0.6069698749166627, "grad_norm": 1.6759512424468994, "learning_rate": 3.5326935419307133e-06, "loss": 0.8008, "step": 17184 }, { "epoch": 0.6070051967203706, "grad_norm": 1.5935636758804321, "learning_rate": 3.5321467267031436e-06, "loss": 0.7701, "step": 17185 }, { "epoch": 0.6070405185240785, "grad_norm": 1.6752300262451172, "learning_rate": 3.531599930686559e-06, "loss": 0.8001, "step": 17186 }, { "epoch": 0.6070758403277864, "grad_norm": 1.5836386680603027, "learning_rate": 3.5310531538881197e-06, "loss": 0.7666, "step": 17187 }, { "epoch": 0.6071111621314943, "grad_norm": 1.6044416427612305, "learning_rate": 3.530506396314981e-06, "loss": 0.7455, "step": 17188 }, { "epoch": 0.6071464839352021, "grad_norm": 1.4575787782669067, "learning_rate": 3.5299596579742973e-06, "loss": 0.75, "step": 17189 }, { "epoch": 0.60718180573891, "grad_norm": 1.8718535900115967, "learning_rate": 3.5294129388732255e-06, "loss": 0.7892, "step": 17190 }, { "epoch": 0.6072171275426179, "grad_norm": 1.656981110572815, "learning_rate": 3.528866239018922e-06, "loss": 0.7617, "step": 17191 }, { "epoch": 0.6072524493463258, "grad_norm": 1.5997114181518555, "learning_rate": 3.5283195584185386e-06, "loss": 0.7846, "step": 17192 }, { "epoch": 0.6072877711500337, "grad_norm": 1.5487862825393677, "learning_rate": 3.5277728970792333e-06, "loss": 0.7665, "step": 17193 }, { "epoch": 0.6073230929537417, "grad_norm": 1.715957760810852, "learning_rate": 3.52722625500816e-06, "loss": 0.7891, "step": 17194 }, { "epoch": 0.6073584147574496, "grad_norm": 1.6085591316223145, "learning_rate": 3.5266796322124713e-06, "loss": 0.7681, "step": 17195 }, { "epoch": 0.6073937365611575, "grad_norm": 1.687103033065796, "learning_rate": 3.5261330286993235e-06, "loss": 0.7639, "step": 17196 }, { "epoch": 0.6074290583648654, "grad_norm": 1.7396106719970703, "learning_rate": 3.5255864444758704e-06, "loss": 0.8151, "step": 17197 }, { "epoch": 0.6074643801685733, "grad_norm": 1.6221147775650024, "learning_rate": 3.5250398795492624e-06, "loss": 0.7783, "step": 17198 }, { "epoch": 0.6074997019722812, "grad_norm": 1.871450662612915, "learning_rate": 3.5244933339266576e-06, "loss": 0.7716, "step": 17199 }, { "epoch": 0.6075350237759891, "grad_norm": 1.663593053817749, "learning_rate": 3.5239468076152046e-06, "loss": 0.7667, "step": 17200 }, { "epoch": 0.607570345579697, "grad_norm": 1.6101726293563843, "learning_rate": 3.5234003006220596e-06, "loss": 0.7615, "step": 17201 }, { "epoch": 0.6076056673834049, "grad_norm": 1.6739745140075684, "learning_rate": 3.522853812954374e-06, "loss": 0.7723, "step": 17202 }, { "epoch": 0.6076409891871128, "grad_norm": 1.6187888383865356, "learning_rate": 3.522307344619301e-06, "loss": 0.7659, "step": 17203 }, { "epoch": 0.6076763109908208, "grad_norm": 1.6993728876113892, "learning_rate": 3.5217608956239906e-06, "loss": 0.8198, "step": 17204 }, { "epoch": 0.6077116327945287, "grad_norm": 1.639406442642212, "learning_rate": 3.521214465975595e-06, "loss": 0.8139, "step": 17205 }, { "epoch": 0.6077469545982366, "grad_norm": 1.6231516599655151, "learning_rate": 3.520668055681267e-06, "loss": 0.7707, "step": 17206 }, { "epoch": 0.6077822764019445, "grad_norm": 1.5348292589187622, "learning_rate": 3.5201216647481577e-06, "loss": 0.7749, "step": 17207 }, { "epoch": 0.6078175982056524, "grad_norm": 1.7773395776748657, "learning_rate": 3.519575293183417e-06, "loss": 0.803, "step": 17208 }, { "epoch": 0.6078529200093603, "grad_norm": 1.7746094465255737, "learning_rate": 3.5190289409941974e-06, "loss": 0.7997, "step": 17209 }, { "epoch": 0.6078882418130682, "grad_norm": 1.8668544292449951, "learning_rate": 3.5184826081876477e-06, "loss": 0.7782, "step": 17210 }, { "epoch": 0.6079235636167761, "grad_norm": 2.0181527137756348, "learning_rate": 3.5179362947709206e-06, "loss": 0.7906, "step": 17211 }, { "epoch": 0.607958885420484, "grad_norm": 1.4985289573669434, "learning_rate": 3.517390000751164e-06, "loss": 0.7936, "step": 17212 }, { "epoch": 0.607994207224192, "grad_norm": 1.7537829875946045, "learning_rate": 3.516843726135527e-06, "loss": 0.8097, "step": 17213 }, { "epoch": 0.6080295290278999, "grad_norm": 1.635890245437622, "learning_rate": 3.5162974709311624e-06, "loss": 0.7771, "step": 17214 }, { "epoch": 0.6080648508316077, "grad_norm": 2.2328999042510986, "learning_rate": 3.515751235145217e-06, "loss": 0.7903, "step": 17215 }, { "epoch": 0.6081001726353156, "grad_norm": 0.9545128345489502, "learning_rate": 3.5152050187848396e-06, "loss": 0.5963, "step": 17216 }, { "epoch": 0.6081354944390235, "grad_norm": 1.7080222368240356, "learning_rate": 3.514658821857181e-06, "loss": 0.778, "step": 17217 }, { "epoch": 0.6081708162427314, "grad_norm": 1.594045877456665, "learning_rate": 3.5141126443693886e-06, "loss": 0.7759, "step": 17218 }, { "epoch": 0.6082061380464393, "grad_norm": 1.621244192123413, "learning_rate": 3.513566486328609e-06, "loss": 0.7766, "step": 17219 }, { "epoch": 0.6082414598501472, "grad_norm": 1.4904797077178955, "learning_rate": 3.513020347741993e-06, "loss": 0.7686, "step": 17220 }, { "epoch": 0.6082767816538551, "grad_norm": 1.9095845222473145, "learning_rate": 3.5124742286166878e-06, "loss": 0.773, "step": 17221 }, { "epoch": 0.608312103457563, "grad_norm": 1.501186490058899, "learning_rate": 3.511928128959841e-06, "loss": 0.7961, "step": 17222 }, { "epoch": 0.608347425261271, "grad_norm": 1.6090410947799683, "learning_rate": 3.511382048778597e-06, "loss": 0.7574, "step": 17223 }, { "epoch": 0.6083827470649789, "grad_norm": 0.9767365455627441, "learning_rate": 3.5108359880801056e-06, "loss": 0.6016, "step": 17224 }, { "epoch": 0.6084180688686868, "grad_norm": 2.1464295387268066, "learning_rate": 3.5102899468715133e-06, "loss": 0.7872, "step": 17225 }, { "epoch": 0.6084533906723947, "grad_norm": 1.6915009021759033, "learning_rate": 3.509743925159965e-06, "loss": 0.7738, "step": 17226 }, { "epoch": 0.6084887124761026, "grad_norm": 1.8722021579742432, "learning_rate": 3.5091979229526098e-06, "loss": 0.8143, "step": 17227 }, { "epoch": 0.6085240342798105, "grad_norm": 1.6765363216400146, "learning_rate": 3.508651940256591e-06, "loss": 0.7587, "step": 17228 }, { "epoch": 0.6085593560835184, "grad_norm": 1.723526120185852, "learning_rate": 3.508105977079054e-06, "loss": 0.7645, "step": 17229 }, { "epoch": 0.6085946778872263, "grad_norm": 1.7416915893554688, "learning_rate": 3.507560033427147e-06, "loss": 0.8047, "step": 17230 }, { "epoch": 0.6086299996909342, "grad_norm": 1.663809895515442, "learning_rate": 3.5070141093080132e-06, "loss": 0.7975, "step": 17231 }, { "epoch": 0.6086653214946421, "grad_norm": 1.7212177515029907, "learning_rate": 3.506468204728798e-06, "loss": 0.8035, "step": 17232 }, { "epoch": 0.60870064329835, "grad_norm": 1.8247426748275757, "learning_rate": 3.5059223196966473e-06, "loss": 0.771, "step": 17233 }, { "epoch": 0.608735965102058, "grad_norm": 1.7439523935317993, "learning_rate": 3.505376454218703e-06, "loss": 0.7645, "step": 17234 }, { "epoch": 0.6087712869057659, "grad_norm": 1.530206561088562, "learning_rate": 3.504830608302111e-06, "loss": 0.7543, "step": 17235 }, { "epoch": 0.6088066087094738, "grad_norm": 1.9826085567474365, "learning_rate": 3.504284781954016e-06, "loss": 0.8064, "step": 17236 }, { "epoch": 0.6088419305131817, "grad_norm": 1.6774243116378784, "learning_rate": 3.5037389751815586e-06, "loss": 0.7882, "step": 17237 }, { "epoch": 0.6088772523168896, "grad_norm": 1.6577707529067993, "learning_rate": 3.5031931879918863e-06, "loss": 0.7436, "step": 17238 }, { "epoch": 0.6089125741205975, "grad_norm": 1.6337422132492065, "learning_rate": 3.5026474203921396e-06, "loss": 0.7751, "step": 17239 }, { "epoch": 0.6089478959243054, "grad_norm": 1.575449824333191, "learning_rate": 3.502101672389463e-06, "loss": 0.757, "step": 17240 }, { "epoch": 0.6089832177280132, "grad_norm": 1.4335497617721558, "learning_rate": 3.5015559439909972e-06, "loss": 0.7792, "step": 17241 }, { "epoch": 0.6090185395317211, "grad_norm": 1.6155976057052612, "learning_rate": 3.501010235203884e-06, "loss": 0.7777, "step": 17242 }, { "epoch": 0.609053861335429, "grad_norm": 1.6136186122894287, "learning_rate": 3.5004645460352693e-06, "loss": 0.7877, "step": 17243 }, { "epoch": 0.609089183139137, "grad_norm": 1.5661355257034302, "learning_rate": 3.499918876492291e-06, "loss": 0.8072, "step": 17244 }, { "epoch": 0.6091245049428449, "grad_norm": 1.7400801181793213, "learning_rate": 3.4993732265820933e-06, "loss": 0.7678, "step": 17245 }, { "epoch": 0.6091598267465528, "grad_norm": 3.2655365467071533, "learning_rate": 3.4988275963118174e-06, "loss": 0.7802, "step": 17246 }, { "epoch": 0.6091951485502607, "grad_norm": 1.8989604711532593, "learning_rate": 3.498281985688602e-06, "loss": 0.8106, "step": 17247 }, { "epoch": 0.6092304703539686, "grad_norm": 1.669512391090393, "learning_rate": 3.497736394719591e-06, "loss": 0.7587, "step": 17248 }, { "epoch": 0.6092657921576765, "grad_norm": 1.6379756927490234, "learning_rate": 3.4971908234119236e-06, "loss": 0.796, "step": 17249 }, { "epoch": 0.6093011139613844, "grad_norm": 1.5806273221969604, "learning_rate": 3.4966452717727394e-06, "loss": 0.7469, "step": 17250 }, { "epoch": 0.6093364357650923, "grad_norm": 1.4985345602035522, "learning_rate": 3.49609973980918e-06, "loss": 0.7626, "step": 17251 }, { "epoch": 0.6093717575688002, "grad_norm": 1.570752739906311, "learning_rate": 3.4955542275283848e-06, "loss": 0.7655, "step": 17252 }, { "epoch": 0.6094070793725082, "grad_norm": 1.5675475597381592, "learning_rate": 3.4950087349374915e-06, "loss": 0.7716, "step": 17253 }, { "epoch": 0.6094424011762161, "grad_norm": 1.7177270650863647, "learning_rate": 3.4944632620436426e-06, "loss": 0.7829, "step": 17254 }, { "epoch": 0.609477722979924, "grad_norm": 1.5971510410308838, "learning_rate": 3.4939178088539747e-06, "loss": 0.8034, "step": 17255 }, { "epoch": 0.6095130447836319, "grad_norm": 1.7485300302505493, "learning_rate": 3.4933723753756275e-06, "loss": 0.8207, "step": 17256 }, { "epoch": 0.6095483665873398, "grad_norm": 1.7338017225265503, "learning_rate": 3.4928269616157397e-06, "loss": 0.7854, "step": 17257 }, { "epoch": 0.6095836883910477, "grad_norm": 1.6272075176239014, "learning_rate": 3.492281567581448e-06, "loss": 0.8031, "step": 17258 }, { "epoch": 0.6096190101947556, "grad_norm": 1.5375964641571045, "learning_rate": 3.4917361932798955e-06, "loss": 0.7589, "step": 17259 }, { "epoch": 0.6096543319984635, "grad_norm": 1.8108869791030884, "learning_rate": 3.491190838718213e-06, "loss": 0.7704, "step": 17260 }, { "epoch": 0.6096896538021714, "grad_norm": 1.6881450414657593, "learning_rate": 3.4906455039035424e-06, "loss": 0.7466, "step": 17261 }, { "epoch": 0.6097249756058793, "grad_norm": 1.5531867742538452, "learning_rate": 3.49010018884302e-06, "loss": 0.7579, "step": 17262 }, { "epoch": 0.6097602974095873, "grad_norm": 1.7070622444152832, "learning_rate": 3.48955489354378e-06, "loss": 0.7531, "step": 17263 }, { "epoch": 0.6097956192132952, "grad_norm": 1.6585369110107422, "learning_rate": 3.489009618012964e-06, "loss": 0.8117, "step": 17264 }, { "epoch": 0.6098309410170031, "grad_norm": 1.9082869291305542, "learning_rate": 3.4884643622577054e-06, "loss": 0.7959, "step": 17265 }, { "epoch": 0.609866262820711, "grad_norm": 1.8590320348739624, "learning_rate": 3.4879191262851403e-06, "loss": 0.8408, "step": 17266 }, { "epoch": 0.6099015846244188, "grad_norm": 1.6448071002960205, "learning_rate": 3.487373910102406e-06, "loss": 0.8118, "step": 17267 }, { "epoch": 0.6099369064281267, "grad_norm": 1.7019765377044678, "learning_rate": 3.4868287137166374e-06, "loss": 0.7615, "step": 17268 }, { "epoch": 0.6099722282318346, "grad_norm": 1.6232171058654785, "learning_rate": 3.4862835371349708e-06, "loss": 0.7781, "step": 17269 }, { "epoch": 0.6100075500355425, "grad_norm": 1.570802927017212, "learning_rate": 3.4857383803645408e-06, "loss": 0.7507, "step": 17270 }, { "epoch": 0.6100428718392504, "grad_norm": 1.654136061668396, "learning_rate": 3.485193243412481e-06, "loss": 0.7943, "step": 17271 }, { "epoch": 0.6100781936429583, "grad_norm": 1.6027212142944336, "learning_rate": 3.484648126285929e-06, "loss": 0.767, "step": 17272 }, { "epoch": 0.6101135154466663, "grad_norm": 1.9046396017074585, "learning_rate": 3.4841030289920164e-06, "loss": 0.7805, "step": 17273 }, { "epoch": 0.6101488372503742, "grad_norm": 1.5980561971664429, "learning_rate": 3.4835579515378782e-06, "loss": 0.776, "step": 17274 }, { "epoch": 0.6101841590540821, "grad_norm": 1.6774519681930542, "learning_rate": 3.4830128939306482e-06, "loss": 0.7808, "step": 17275 }, { "epoch": 0.61021948085779, "grad_norm": 1.6892706155776978, "learning_rate": 3.4824678561774614e-06, "loss": 0.8323, "step": 17276 }, { "epoch": 0.6102548026614979, "grad_norm": 1.4925469160079956, "learning_rate": 3.4819228382854515e-06, "loss": 0.7482, "step": 17277 }, { "epoch": 0.6102901244652058, "grad_norm": 1.731372594833374, "learning_rate": 3.481377840261747e-06, "loss": 0.7813, "step": 17278 }, { "epoch": 0.6103254462689137, "grad_norm": 1.6242815256118774, "learning_rate": 3.4808328621134857e-06, "loss": 0.761, "step": 17279 }, { "epoch": 0.6103607680726216, "grad_norm": 1.7497869729995728, "learning_rate": 3.4802879038477976e-06, "loss": 0.8003, "step": 17280 }, { "epoch": 0.6103960898763295, "grad_norm": 1.6251869201660156, "learning_rate": 3.4797429654718158e-06, "loss": 0.751, "step": 17281 }, { "epoch": 0.6104314116800375, "grad_norm": 1.9850691556930542, "learning_rate": 3.479198046992672e-06, "loss": 0.8088, "step": 17282 }, { "epoch": 0.6104667334837454, "grad_norm": 1.9065924882888794, "learning_rate": 3.478653148417499e-06, "loss": 0.7869, "step": 17283 }, { "epoch": 0.6105020552874533, "grad_norm": 1.585633397102356, "learning_rate": 3.478108269753427e-06, "loss": 0.784, "step": 17284 }, { "epoch": 0.6105373770911612, "grad_norm": 1.789764642715454, "learning_rate": 3.4775634110075885e-06, "loss": 0.7874, "step": 17285 }, { "epoch": 0.6105726988948691, "grad_norm": 1.5741771459579468, "learning_rate": 3.4770185721871138e-06, "loss": 0.7838, "step": 17286 }, { "epoch": 0.610608020698577, "grad_norm": 1.7029813528060913, "learning_rate": 3.4764737532991323e-06, "loss": 0.77, "step": 17287 }, { "epoch": 0.6106433425022849, "grad_norm": 1.8116267919540405, "learning_rate": 3.4759289543507773e-06, "loss": 0.7711, "step": 17288 }, { "epoch": 0.6106786643059928, "grad_norm": 1.8613001108169556, "learning_rate": 3.475384175349178e-06, "loss": 0.7696, "step": 17289 }, { "epoch": 0.6107139861097007, "grad_norm": 1.6288890838623047, "learning_rate": 3.4748394163014624e-06, "loss": 0.7854, "step": 17290 }, { "epoch": 0.6107493079134086, "grad_norm": 1.6452603340148926, "learning_rate": 3.4742946772147635e-06, "loss": 0.7717, "step": 17291 }, { "epoch": 0.6107846297171166, "grad_norm": 1.7622580528259277, "learning_rate": 3.4737499580962073e-06, "loss": 0.8214, "step": 17292 }, { "epoch": 0.6108199515208244, "grad_norm": 0.9786136150360107, "learning_rate": 3.4732052589529263e-06, "loss": 0.5996, "step": 17293 }, { "epoch": 0.6108552733245323, "grad_norm": 2.0386674404144287, "learning_rate": 3.4726605797920474e-06, "loss": 0.771, "step": 17294 }, { "epoch": 0.6108905951282402, "grad_norm": 1.9061237573623657, "learning_rate": 3.4721159206206993e-06, "loss": 0.7721, "step": 17295 }, { "epoch": 0.6109259169319481, "grad_norm": 1.6877914667129517, "learning_rate": 3.4715712814460135e-06, "loss": 0.8041, "step": 17296 }, { "epoch": 0.610961238735656, "grad_norm": 1.5398410558700562, "learning_rate": 3.4710266622751127e-06, "loss": 0.7349, "step": 17297 }, { "epoch": 0.6109965605393639, "grad_norm": 1.6052725315093994, "learning_rate": 3.470482063115128e-06, "loss": 0.7656, "step": 17298 }, { "epoch": 0.6110318823430718, "grad_norm": 1.725760817527771, "learning_rate": 3.4699374839731868e-06, "loss": 0.7981, "step": 17299 }, { "epoch": 0.6110672041467797, "grad_norm": 4.9678826332092285, "learning_rate": 3.4693929248564155e-06, "loss": 0.736, "step": 17300 }, { "epoch": 0.6111025259504876, "grad_norm": 1.5712898969650269, "learning_rate": 3.468848385771943e-06, "loss": 0.7782, "step": 17301 }, { "epoch": 0.6111378477541956, "grad_norm": 1.9764922857284546, "learning_rate": 3.468303866726893e-06, "loss": 0.8128, "step": 17302 }, { "epoch": 0.6111731695579035, "grad_norm": 2.103855848312378, "learning_rate": 3.467759367728396e-06, "loss": 0.8058, "step": 17303 }, { "epoch": 0.6112084913616114, "grad_norm": 1.7429554462432861, "learning_rate": 3.4672148887835764e-06, "loss": 0.7566, "step": 17304 }, { "epoch": 0.6112438131653193, "grad_norm": 1.6310276985168457, "learning_rate": 3.466670429899559e-06, "loss": 0.7785, "step": 17305 }, { "epoch": 0.6112791349690272, "grad_norm": 1.7055453062057495, "learning_rate": 3.466125991083471e-06, "loss": 0.7521, "step": 17306 }, { "epoch": 0.6113144567727351, "grad_norm": 1.8281846046447754, "learning_rate": 3.465581572342439e-06, "loss": 0.7879, "step": 17307 }, { "epoch": 0.611349778576443, "grad_norm": 1.6060765981674194, "learning_rate": 3.465037173683585e-06, "loss": 0.817, "step": 17308 }, { "epoch": 0.6113851003801509, "grad_norm": 1.7030199766159058, "learning_rate": 3.464492795114037e-06, "loss": 0.7822, "step": 17309 }, { "epoch": 0.6114204221838588, "grad_norm": 1.702248215675354, "learning_rate": 3.4639484366409193e-06, "loss": 0.8062, "step": 17310 }, { "epoch": 0.6114557439875667, "grad_norm": 1.6397697925567627, "learning_rate": 3.463404098271354e-06, "loss": 0.7777, "step": 17311 }, { "epoch": 0.6114910657912747, "grad_norm": 1.7239125967025757, "learning_rate": 3.4628597800124685e-06, "loss": 0.7951, "step": 17312 }, { "epoch": 0.6115263875949826, "grad_norm": 1.6949663162231445, "learning_rate": 3.462315481871385e-06, "loss": 0.7644, "step": 17313 }, { "epoch": 0.6115617093986905, "grad_norm": 1.5534334182739258, "learning_rate": 3.4617712038552287e-06, "loss": 0.7936, "step": 17314 }, { "epoch": 0.6115970312023984, "grad_norm": 1.7952134609222412, "learning_rate": 3.461226945971119e-06, "loss": 0.761, "step": 17315 }, { "epoch": 0.6116323530061063, "grad_norm": 1.7113538980484009, "learning_rate": 3.4606827082261833e-06, "loss": 0.787, "step": 17316 }, { "epoch": 0.6116676748098142, "grad_norm": 6.646385669708252, "learning_rate": 3.460138490627543e-06, "loss": 0.7955, "step": 17317 }, { "epoch": 0.6117029966135221, "grad_norm": 1.5346665382385254, "learning_rate": 3.4595942931823203e-06, "loss": 0.769, "step": 17318 }, { "epoch": 0.6117383184172299, "grad_norm": 1.8214222192764282, "learning_rate": 3.4590501158976377e-06, "loss": 0.8122, "step": 17319 }, { "epoch": 0.6117736402209378, "grad_norm": 1.948533058166504, "learning_rate": 3.458505958780618e-06, "loss": 0.7861, "step": 17320 }, { "epoch": 0.6118089620246457, "grad_norm": 1.4495768547058105, "learning_rate": 3.4579618218383814e-06, "loss": 0.7517, "step": 17321 }, { "epoch": 0.6118442838283537, "grad_norm": 1.5762532949447632, "learning_rate": 3.4574177050780523e-06, "loss": 0.7751, "step": 17322 }, { "epoch": 0.6118796056320616, "grad_norm": 1.5466538667678833, "learning_rate": 3.4568736085067496e-06, "loss": 0.7827, "step": 17323 }, { "epoch": 0.6119149274357695, "grad_norm": 1.6471883058547974, "learning_rate": 3.4563295321315936e-06, "loss": 0.8112, "step": 17324 }, { "epoch": 0.6119502492394774, "grad_norm": 1.6985474824905396, "learning_rate": 3.4557854759597086e-06, "loss": 0.7773, "step": 17325 }, { "epoch": 0.6119855710431853, "grad_norm": 1.7251378297805786, "learning_rate": 3.4552414399982127e-06, "loss": 0.7643, "step": 17326 }, { "epoch": 0.6120208928468932, "grad_norm": 1.6767890453338623, "learning_rate": 3.454697424254225e-06, "loss": 0.8295, "step": 17327 }, { "epoch": 0.6120562146506011, "grad_norm": 1.5292937755584717, "learning_rate": 3.4541534287348688e-06, "loss": 0.7759, "step": 17328 }, { "epoch": 0.612091536454309, "grad_norm": 1.596621036529541, "learning_rate": 3.45360945344726e-06, "loss": 0.7871, "step": 17329 }, { "epoch": 0.6121268582580169, "grad_norm": 2.060114622116089, "learning_rate": 3.4530654983985214e-06, "loss": 0.7955, "step": 17330 }, { "epoch": 0.6121621800617248, "grad_norm": 1.7317609786987305, "learning_rate": 3.4525215635957707e-06, "loss": 0.7739, "step": 17331 }, { "epoch": 0.6121975018654328, "grad_norm": 1.6881060600280762, "learning_rate": 3.4519776490461255e-06, "loss": 0.7942, "step": 17332 }, { "epoch": 0.6122328236691407, "grad_norm": 1.753786325454712, "learning_rate": 3.4514337547567095e-06, "loss": 0.7652, "step": 17333 }, { "epoch": 0.6122681454728486, "grad_norm": 1.575643539428711, "learning_rate": 3.450889880734634e-06, "loss": 0.7913, "step": 17334 }, { "epoch": 0.6123034672765565, "grad_norm": 1.8008122444152832, "learning_rate": 3.4503460269870214e-06, "loss": 0.802, "step": 17335 }, { "epoch": 0.6123387890802644, "grad_norm": 1.582229733467102, "learning_rate": 3.449802193520989e-06, "loss": 0.7658, "step": 17336 }, { "epoch": 0.6123741108839723, "grad_norm": 1.5982980728149414, "learning_rate": 3.4492583803436535e-06, "loss": 0.7315, "step": 17337 }, { "epoch": 0.6124094326876802, "grad_norm": 1.650912880897522, "learning_rate": 3.448714587462133e-06, "loss": 0.7503, "step": 17338 }, { "epoch": 0.6124447544913881, "grad_norm": 1.6382372379302979, "learning_rate": 3.448170814883544e-06, "loss": 0.7867, "step": 17339 }, { "epoch": 0.612480076295096, "grad_norm": 1.7581769227981567, "learning_rate": 3.4476270626150043e-06, "loss": 0.7866, "step": 17340 }, { "epoch": 0.612515398098804, "grad_norm": 1.7443983554840088, "learning_rate": 3.44708333066363e-06, "loss": 0.7675, "step": 17341 }, { "epoch": 0.6125507199025119, "grad_norm": 1.5780078172683716, "learning_rate": 3.446539619036536e-06, "loss": 0.7579, "step": 17342 }, { "epoch": 0.6125860417062198, "grad_norm": 1.9119889736175537, "learning_rate": 3.4459959277408407e-06, "loss": 0.7662, "step": 17343 }, { "epoch": 0.6126213635099277, "grad_norm": 2.3706371784210205, "learning_rate": 3.4454522567836583e-06, "loss": 0.7646, "step": 17344 }, { "epoch": 0.6126566853136356, "grad_norm": 1.615858793258667, "learning_rate": 3.444908606172104e-06, "loss": 0.7673, "step": 17345 }, { "epoch": 0.6126920071173434, "grad_norm": 1.6540899276733398, "learning_rate": 3.4443649759132935e-06, "loss": 0.7695, "step": 17346 }, { "epoch": 0.6127273289210513, "grad_norm": 1.7244824171066284, "learning_rate": 3.4438213660143426e-06, "loss": 0.778, "step": 17347 }, { "epoch": 0.6127626507247592, "grad_norm": 1.6224218606948853, "learning_rate": 3.4432777764823645e-06, "loss": 0.7886, "step": 17348 }, { "epoch": 0.6127979725284671, "grad_norm": 1.513098955154419, "learning_rate": 3.442734207324474e-06, "loss": 0.8018, "step": 17349 }, { "epoch": 0.612833294332175, "grad_norm": 2.0804877281188965, "learning_rate": 3.442190658547786e-06, "loss": 0.7603, "step": 17350 }, { "epoch": 0.612868616135883, "grad_norm": 1.7992631196975708, "learning_rate": 3.4416471301594157e-06, "loss": 0.8123, "step": 17351 }, { "epoch": 0.6129039379395909, "grad_norm": 1.697046160697937, "learning_rate": 3.441103622166472e-06, "loss": 0.7882, "step": 17352 }, { "epoch": 0.6129392597432988, "grad_norm": 1.6938331127166748, "learning_rate": 3.440560134576072e-06, "loss": 0.7971, "step": 17353 }, { "epoch": 0.6129745815470067, "grad_norm": 1.6879104375839233, "learning_rate": 3.4400166673953277e-06, "loss": 0.8099, "step": 17354 }, { "epoch": 0.6130099033507146, "grad_norm": 1.7644673585891724, "learning_rate": 3.4394732206313514e-06, "loss": 0.7979, "step": 17355 }, { "epoch": 0.6130452251544225, "grad_norm": 1.9142978191375732, "learning_rate": 3.438929794291257e-06, "loss": 0.7811, "step": 17356 }, { "epoch": 0.6130805469581304, "grad_norm": 1.7695883512496948, "learning_rate": 3.438386388382156e-06, "loss": 0.8043, "step": 17357 }, { "epoch": 0.6131158687618383, "grad_norm": 1.5464808940887451, "learning_rate": 3.43784300291116e-06, "loss": 0.7664, "step": 17358 }, { "epoch": 0.6131511905655462, "grad_norm": 1.5575757026672363, "learning_rate": 3.437299637885381e-06, "loss": 0.7857, "step": 17359 }, { "epoch": 0.6131865123692541, "grad_norm": 1.6908432245254517, "learning_rate": 3.4367562933119313e-06, "loss": 0.7851, "step": 17360 }, { "epoch": 0.613221834172962, "grad_norm": 2.6504619121551514, "learning_rate": 3.436212969197921e-06, "loss": 0.7975, "step": 17361 }, { "epoch": 0.61325715597667, "grad_norm": 0.9852632880210876, "learning_rate": 3.4356696655504613e-06, "loss": 0.5854, "step": 17362 }, { "epoch": 0.6132924777803779, "grad_norm": 1.852499008178711, "learning_rate": 3.435126382376662e-06, "loss": 0.7903, "step": 17363 }, { "epoch": 0.6133277995840858, "grad_norm": 1.7836506366729736, "learning_rate": 3.434583119683636e-06, "loss": 0.7852, "step": 17364 }, { "epoch": 0.6133631213877937, "grad_norm": 1.6430237293243408, "learning_rate": 3.4340398774784917e-06, "loss": 0.7531, "step": 17365 }, { "epoch": 0.6133984431915016, "grad_norm": 1.6017963886260986, "learning_rate": 3.433496655768338e-06, "loss": 0.7762, "step": 17366 }, { "epoch": 0.6134337649952095, "grad_norm": 2.148587465286255, "learning_rate": 3.432953454560287e-06, "loss": 0.7741, "step": 17367 }, { "epoch": 0.6134690867989174, "grad_norm": 1.8020251989364624, "learning_rate": 3.4324102738614463e-06, "loss": 0.7824, "step": 17368 }, { "epoch": 0.6135044086026253, "grad_norm": 0.9939335584640503, "learning_rate": 3.4318671136789237e-06, "loss": 0.5941, "step": 17369 }, { "epoch": 0.6135397304063333, "grad_norm": 1.5521421432495117, "learning_rate": 3.4313239740198333e-06, "loss": 0.75, "step": 17370 }, { "epoch": 0.6135750522100412, "grad_norm": 1.5644667148590088, "learning_rate": 3.430780854891277e-06, "loss": 0.8495, "step": 17371 }, { "epoch": 0.613610374013749, "grad_norm": 1.6465650796890259, "learning_rate": 3.4302377563003663e-06, "loss": 0.7821, "step": 17372 }, { "epoch": 0.6136456958174569, "grad_norm": 1.6421507596969604, "learning_rate": 3.429694678254208e-06, "loss": 0.807, "step": 17373 }, { "epoch": 0.6136810176211648, "grad_norm": 1.6829270124435425, "learning_rate": 3.4291516207599118e-06, "loss": 0.8029, "step": 17374 }, { "epoch": 0.6137163394248727, "grad_norm": 1.7140891551971436, "learning_rate": 3.428608583824583e-06, "loss": 0.7675, "step": 17375 }, { "epoch": 0.6137516612285806, "grad_norm": 1.5655937194824219, "learning_rate": 3.4280655674553297e-06, "loss": 0.7909, "step": 17376 }, { "epoch": 0.6137869830322885, "grad_norm": 1.5918598175048828, "learning_rate": 3.4275225716592598e-06, "loss": 0.77, "step": 17377 }, { "epoch": 0.6138223048359964, "grad_norm": 1.6564550399780273, "learning_rate": 3.426979596443478e-06, "loss": 0.7813, "step": 17378 }, { "epoch": 0.6138576266397043, "grad_norm": 1.6006455421447754, "learning_rate": 3.4264366418150907e-06, "loss": 0.778, "step": 17379 }, { "epoch": 0.6138929484434122, "grad_norm": 1.595308780670166, "learning_rate": 3.4258937077812065e-06, "loss": 0.8002, "step": 17380 }, { "epoch": 0.6139282702471202, "grad_norm": 1.6881132125854492, "learning_rate": 3.4253507943489296e-06, "loss": 0.826, "step": 17381 }, { "epoch": 0.6139635920508281, "grad_norm": 1.577313780784607, "learning_rate": 3.4248079015253633e-06, "loss": 0.7697, "step": 17382 }, { "epoch": 0.613998913854536, "grad_norm": 1.6155576705932617, "learning_rate": 3.424265029317617e-06, "loss": 0.7895, "step": 17383 }, { "epoch": 0.6140342356582439, "grad_norm": 1.8084725141525269, "learning_rate": 3.423722177732794e-06, "loss": 0.7791, "step": 17384 }, { "epoch": 0.6140695574619518, "grad_norm": 1.8453863859176636, "learning_rate": 3.423179346777997e-06, "loss": 0.8098, "step": 17385 }, { "epoch": 0.6141048792656597, "grad_norm": 1.7650219202041626, "learning_rate": 3.422636536460334e-06, "loss": 0.7635, "step": 17386 }, { "epoch": 0.6141402010693676, "grad_norm": 1.9590116739273071, "learning_rate": 3.4220937467869065e-06, "loss": 0.7965, "step": 17387 }, { "epoch": 0.6141755228730755, "grad_norm": 2.0204076766967773, "learning_rate": 3.4215509777648224e-06, "loss": 0.7585, "step": 17388 }, { "epoch": 0.6142108446767834, "grad_norm": 1.8842933177947998, "learning_rate": 3.4210082294011794e-06, "loss": 0.8055, "step": 17389 }, { "epoch": 0.6142461664804914, "grad_norm": 1.6989566087722778, "learning_rate": 3.420465501703084e-06, "loss": 0.7786, "step": 17390 }, { "epoch": 0.6142814882841993, "grad_norm": 1.6098113059997559, "learning_rate": 3.4199227946776403e-06, "loss": 0.7747, "step": 17391 }, { "epoch": 0.6143168100879072, "grad_norm": 1.6948577165603638, "learning_rate": 3.419380108331949e-06, "loss": 0.8183, "step": 17392 }, { "epoch": 0.6143521318916151, "grad_norm": 1.6895028352737427, "learning_rate": 3.418837442673114e-06, "loss": 0.7709, "step": 17393 }, { "epoch": 0.614387453695323, "grad_norm": 1.7341206073760986, "learning_rate": 3.4182947977082377e-06, "loss": 0.8, "step": 17394 }, { "epoch": 0.6144227754990309, "grad_norm": 1.843869686126709, "learning_rate": 3.417752173444421e-06, "loss": 0.7397, "step": 17395 }, { "epoch": 0.6144580973027388, "grad_norm": 1.5941941738128662, "learning_rate": 3.417209569888767e-06, "loss": 0.7886, "step": 17396 }, { "epoch": 0.6144934191064467, "grad_norm": 1.7446038722991943, "learning_rate": 3.4166669870483758e-06, "loss": 0.7756, "step": 17397 }, { "epoch": 0.6145287409101545, "grad_norm": 1.6632475852966309, "learning_rate": 3.4161244249303504e-06, "loss": 0.7693, "step": 17398 }, { "epoch": 0.6145640627138624, "grad_norm": 1.5770795345306396, "learning_rate": 3.415581883541791e-06, "loss": 0.7646, "step": 17399 }, { "epoch": 0.6145993845175703, "grad_norm": 1.7012194395065308, "learning_rate": 3.4150393628897964e-06, "loss": 0.7855, "step": 17400 }, { "epoch": 0.6146347063212783, "grad_norm": 1.7025011777877808, "learning_rate": 3.41449686298147e-06, "loss": 0.7656, "step": 17401 }, { "epoch": 0.6146700281249862, "grad_norm": 1.679673194885254, "learning_rate": 3.4139543838239107e-06, "loss": 0.778, "step": 17402 }, { "epoch": 0.6147053499286941, "grad_norm": 1.563668131828308, "learning_rate": 3.4134119254242176e-06, "loss": 0.7727, "step": 17403 }, { "epoch": 0.614740671732402, "grad_norm": 1.6724287271499634, "learning_rate": 3.412869487789492e-06, "loss": 0.7893, "step": 17404 }, { "epoch": 0.6147759935361099, "grad_norm": 1.6734726428985596, "learning_rate": 3.412327070926832e-06, "loss": 0.7656, "step": 17405 }, { "epoch": 0.6148113153398178, "grad_norm": 2.13275146484375, "learning_rate": 3.4117846748433354e-06, "loss": 0.7835, "step": 17406 }, { "epoch": 0.6148466371435257, "grad_norm": 1.6101678609848022, "learning_rate": 3.411242299546105e-06, "loss": 0.8003, "step": 17407 }, { "epoch": 0.6148819589472336, "grad_norm": 1.892296552658081, "learning_rate": 3.410699945042235e-06, "loss": 0.8044, "step": 17408 }, { "epoch": 0.6149172807509415, "grad_norm": 1.6941373348236084, "learning_rate": 3.4101576113388264e-06, "loss": 0.7783, "step": 17409 }, { "epoch": 0.6149526025546495, "grad_norm": 1.6792898178100586, "learning_rate": 3.4096152984429742e-06, "loss": 0.7849, "step": 17410 }, { "epoch": 0.6149879243583574, "grad_norm": 1.5798060894012451, "learning_rate": 3.4090730063617794e-06, "loss": 0.7557, "step": 17411 }, { "epoch": 0.6150232461620653, "grad_norm": 1.6042494773864746, "learning_rate": 3.4085307351023384e-06, "loss": 0.7624, "step": 17412 }, { "epoch": 0.6150585679657732, "grad_norm": 1.7386243343353271, "learning_rate": 3.4079884846717466e-06, "loss": 0.7761, "step": 17413 }, { "epoch": 0.6150938897694811, "grad_norm": 1.7833751440048218, "learning_rate": 3.4074462550771032e-06, "loss": 0.7566, "step": 17414 }, { "epoch": 0.615129211573189, "grad_norm": 1.6162526607513428, "learning_rate": 3.406904046325504e-06, "loss": 0.7952, "step": 17415 }, { "epoch": 0.6151645333768969, "grad_norm": 1.5489389896392822, "learning_rate": 3.406361858424044e-06, "loss": 0.7765, "step": 17416 }, { "epoch": 0.6151998551806048, "grad_norm": 1.6191409826278687, "learning_rate": 3.4058196913798215e-06, "loss": 0.8108, "step": 17417 }, { "epoch": 0.6152351769843127, "grad_norm": 2.122635841369629, "learning_rate": 3.405277545199931e-06, "loss": 0.7848, "step": 17418 }, { "epoch": 0.6152704987880206, "grad_norm": 1.640629768371582, "learning_rate": 3.404735419891467e-06, "loss": 0.7975, "step": 17419 }, { "epoch": 0.6153058205917286, "grad_norm": 1.889140248298645, "learning_rate": 3.4041933154615283e-06, "loss": 0.7849, "step": 17420 }, { "epoch": 0.6153411423954365, "grad_norm": 1.6441364288330078, "learning_rate": 3.4036512319172053e-06, "loss": 0.796, "step": 17421 }, { "epoch": 0.6153764641991444, "grad_norm": 1.708330512046814, "learning_rate": 3.403109169265596e-06, "loss": 0.7818, "step": 17422 }, { "epoch": 0.6154117860028523, "grad_norm": 0.950023353099823, "learning_rate": 3.402567127513794e-06, "loss": 0.5917, "step": 17423 }, { "epoch": 0.6154471078065601, "grad_norm": 2.1557745933532715, "learning_rate": 3.4020251066688926e-06, "loss": 0.8294, "step": 17424 }, { "epoch": 0.615482429610268, "grad_norm": 1.6697642803192139, "learning_rate": 3.4014831067379883e-06, "loss": 0.7916, "step": 17425 }, { "epoch": 0.6155177514139759, "grad_norm": 1.6872954368591309, "learning_rate": 3.4009411277281702e-06, "loss": 0.7855, "step": 17426 }, { "epoch": 0.6155530732176838, "grad_norm": 1.6964716911315918, "learning_rate": 3.4003991696465343e-06, "loss": 0.7908, "step": 17427 }, { "epoch": 0.6155883950213917, "grad_norm": 1.6128708124160767, "learning_rate": 3.3998572325001744e-06, "loss": 0.7476, "step": 17428 }, { "epoch": 0.6156237168250996, "grad_norm": 1.7161532640457153, "learning_rate": 3.39931531629618e-06, "loss": 0.783, "step": 17429 }, { "epoch": 0.6156590386288076, "grad_norm": 1.6004801988601685, "learning_rate": 3.3987734210416478e-06, "loss": 0.7518, "step": 17430 }, { "epoch": 0.6156943604325155, "grad_norm": 1.5496538877487183, "learning_rate": 3.398231546743667e-06, "loss": 0.7783, "step": 17431 }, { "epoch": 0.6157296822362234, "grad_norm": 1.7454512119293213, "learning_rate": 3.397689693409332e-06, "loss": 0.7541, "step": 17432 }, { "epoch": 0.6157650040399313, "grad_norm": 1.6691428422927856, "learning_rate": 3.397147861045732e-06, "loss": 0.7598, "step": 17433 }, { "epoch": 0.6158003258436392, "grad_norm": 0.9790822863578796, "learning_rate": 3.396606049659959e-06, "loss": 0.5954, "step": 17434 }, { "epoch": 0.6158356476473471, "grad_norm": 1.6238752603530884, "learning_rate": 3.3960642592591057e-06, "loss": 0.7715, "step": 17435 }, { "epoch": 0.615870969451055, "grad_norm": 1.6310094594955444, "learning_rate": 3.395522489850262e-06, "loss": 0.7821, "step": 17436 }, { "epoch": 0.6159062912547629, "grad_norm": 1.595625400543213, "learning_rate": 3.394980741440517e-06, "loss": 0.7759, "step": 17437 }, { "epoch": 0.6159416130584708, "grad_norm": 1.545608639717102, "learning_rate": 3.394439014036963e-06, "loss": 0.7611, "step": 17438 }, { "epoch": 0.6159769348621787, "grad_norm": 1.6341021060943604, "learning_rate": 3.393897307646691e-06, "loss": 0.7612, "step": 17439 }, { "epoch": 0.6160122566658867, "grad_norm": 1.6478153467178345, "learning_rate": 3.393355622276786e-06, "loss": 0.7639, "step": 17440 }, { "epoch": 0.6160475784695946, "grad_norm": 1.6519365310668945, "learning_rate": 3.3928139579343435e-06, "loss": 0.766, "step": 17441 }, { "epoch": 0.6160829002733025, "grad_norm": 0.9593793153762817, "learning_rate": 3.3922723146264485e-06, "loss": 0.5486, "step": 17442 }, { "epoch": 0.6161182220770104, "grad_norm": 1.8198349475860596, "learning_rate": 3.3917306923601927e-06, "loss": 0.776, "step": 17443 }, { "epoch": 0.6161535438807183, "grad_norm": 1.8672188520431519, "learning_rate": 3.3911890911426617e-06, "loss": 0.7655, "step": 17444 }, { "epoch": 0.6161888656844262, "grad_norm": 1.6197574138641357, "learning_rate": 3.3906475109809454e-06, "loss": 0.7559, "step": 17445 }, { "epoch": 0.6162241874881341, "grad_norm": 1.656765103340149, "learning_rate": 3.3901059518821323e-06, "loss": 0.7886, "step": 17446 }, { "epoch": 0.616259509291842, "grad_norm": 1.8053261041641235, "learning_rate": 3.389564413853309e-06, "loss": 0.8016, "step": 17447 }, { "epoch": 0.6162948310955499, "grad_norm": 1.7215666770935059, "learning_rate": 3.3890228969015655e-06, "loss": 0.8196, "step": 17448 }, { "epoch": 0.6163301528992579, "grad_norm": 1.7005647420883179, "learning_rate": 3.3884814010339872e-06, "loss": 0.7935, "step": 17449 }, { "epoch": 0.6163654747029657, "grad_norm": 1.5379884243011475, "learning_rate": 3.3879399262576594e-06, "loss": 0.7687, "step": 17450 }, { "epoch": 0.6164007965066736, "grad_norm": 1.6321674585342407, "learning_rate": 3.387398472579673e-06, "loss": 0.8107, "step": 17451 }, { "epoch": 0.6164361183103815, "grad_norm": 1.8255369663238525, "learning_rate": 3.3868570400071123e-06, "loss": 0.7952, "step": 17452 }, { "epoch": 0.6164714401140894, "grad_norm": 1.7108147144317627, "learning_rate": 3.3863156285470618e-06, "loss": 0.7845, "step": 17453 }, { "epoch": 0.6165067619177973, "grad_norm": 1.7781250476837158, "learning_rate": 3.38577423820661e-06, "loss": 0.8206, "step": 17454 }, { "epoch": 0.6165420837215052, "grad_norm": 1.7765179872512817, "learning_rate": 3.3852328689928404e-06, "loss": 0.7914, "step": 17455 }, { "epoch": 0.6165774055252131, "grad_norm": 1.5924229621887207, "learning_rate": 3.384691520912841e-06, "loss": 0.7604, "step": 17456 }, { "epoch": 0.616612727328921, "grad_norm": 1.6067509651184082, "learning_rate": 3.3841501939736955e-06, "loss": 0.79, "step": 17457 }, { "epoch": 0.6166480491326289, "grad_norm": 1.653243064880371, "learning_rate": 3.383608888182487e-06, "loss": 0.7921, "step": 17458 }, { "epoch": 0.6166833709363368, "grad_norm": 1.747006893157959, "learning_rate": 3.383067603546303e-06, "loss": 0.7871, "step": 17459 }, { "epoch": 0.6167186927400448, "grad_norm": 1.6274733543395996, "learning_rate": 3.3825263400722253e-06, "loss": 0.7794, "step": 17460 }, { "epoch": 0.6167540145437527, "grad_norm": 1.7168160676956177, "learning_rate": 3.3819850977673386e-06, "loss": 0.7897, "step": 17461 }, { "epoch": 0.6167893363474606, "grad_norm": 1.6502302885055542, "learning_rate": 3.3814438766387296e-06, "loss": 0.7904, "step": 17462 }, { "epoch": 0.6168246581511685, "grad_norm": 1.6646292209625244, "learning_rate": 3.380902676693476e-06, "loss": 0.7725, "step": 17463 }, { "epoch": 0.6168599799548764, "grad_norm": 1.750789999961853, "learning_rate": 3.3803614979386644e-06, "loss": 0.7657, "step": 17464 }, { "epoch": 0.6168953017585843, "grad_norm": 1.6903141736984253, "learning_rate": 3.379820340381377e-06, "loss": 0.773, "step": 17465 }, { "epoch": 0.6169306235622922, "grad_norm": 1.6283490657806396, "learning_rate": 3.3792792040286966e-06, "loss": 0.7734, "step": 17466 }, { "epoch": 0.6169659453660001, "grad_norm": 1.672713279724121, "learning_rate": 3.3787380888877054e-06, "loss": 0.7936, "step": 17467 }, { "epoch": 0.617001267169708, "grad_norm": 1.7613601684570312, "learning_rate": 3.3781969949654846e-06, "loss": 0.7957, "step": 17468 }, { "epoch": 0.617036588973416, "grad_norm": 1.688522458076477, "learning_rate": 3.3776559222691175e-06, "loss": 0.8044, "step": 17469 }, { "epoch": 0.6170719107771239, "grad_norm": 1.6382522583007812, "learning_rate": 3.3771148708056844e-06, "loss": 0.7869, "step": 17470 }, { "epoch": 0.6171072325808318, "grad_norm": 1.6200698614120483, "learning_rate": 3.376573840582266e-06, "loss": 0.7906, "step": 17471 }, { "epoch": 0.6171425543845397, "grad_norm": 1.936856746673584, "learning_rate": 3.3760328316059455e-06, "loss": 0.7705, "step": 17472 }, { "epoch": 0.6171778761882476, "grad_norm": 1.6332694292068481, "learning_rate": 3.3754918438838012e-06, "loss": 0.7566, "step": 17473 }, { "epoch": 0.6172131979919555, "grad_norm": 1.7030160427093506, "learning_rate": 3.374950877422914e-06, "loss": 0.7768, "step": 17474 }, { "epoch": 0.6172485197956634, "grad_norm": 1.6800341606140137, "learning_rate": 3.374409932230365e-06, "loss": 0.7688, "step": 17475 }, { "epoch": 0.6172838415993712, "grad_norm": 1.577562928199768, "learning_rate": 3.373869008313233e-06, "loss": 0.7554, "step": 17476 }, { "epoch": 0.6173191634030791, "grad_norm": 1.7536648511886597, "learning_rate": 3.3733281056785972e-06, "loss": 0.8241, "step": 17477 }, { "epoch": 0.617354485206787, "grad_norm": 1.5647283792495728, "learning_rate": 3.372787224333538e-06, "loss": 0.7443, "step": 17478 }, { "epoch": 0.617389807010495, "grad_norm": 1.8534376621246338, "learning_rate": 3.3722463642851334e-06, "loss": 0.8257, "step": 17479 }, { "epoch": 0.6174251288142029, "grad_norm": 1.9504849910736084, "learning_rate": 3.371705525540464e-06, "loss": 0.8106, "step": 17480 }, { "epoch": 0.6174604506179108, "grad_norm": 1.5857698917388916, "learning_rate": 3.3711647081066045e-06, "loss": 0.7722, "step": 17481 }, { "epoch": 0.6174957724216187, "grad_norm": 3.262404203414917, "learning_rate": 3.3706239119906364e-06, "loss": 0.7693, "step": 17482 }, { "epoch": 0.6175310942253266, "grad_norm": 1.674242615699768, "learning_rate": 3.3700831371996358e-06, "loss": 0.7897, "step": 17483 }, { "epoch": 0.6175664160290345, "grad_norm": 1.6469084024429321, "learning_rate": 3.3695423837406797e-06, "loss": 0.7735, "step": 17484 }, { "epoch": 0.6176017378327424, "grad_norm": 1.5391900539398193, "learning_rate": 3.369001651620848e-06, "loss": 0.7709, "step": 17485 }, { "epoch": 0.6176370596364503, "grad_norm": 1.5555392503738403, "learning_rate": 3.3684609408472153e-06, "loss": 0.7465, "step": 17486 }, { "epoch": 0.6176723814401582, "grad_norm": 1.7655199766159058, "learning_rate": 3.3679202514268583e-06, "loss": 0.762, "step": 17487 }, { "epoch": 0.6177077032438661, "grad_norm": 1.6003000736236572, "learning_rate": 3.3673795833668554e-06, "loss": 0.784, "step": 17488 }, { "epoch": 0.6177430250475741, "grad_norm": 1.7716586589813232, "learning_rate": 3.3668389366742805e-06, "loss": 0.8044, "step": 17489 }, { "epoch": 0.617778346851282, "grad_norm": 1.6522918939590454, "learning_rate": 3.366298311356212e-06, "loss": 0.7784, "step": 17490 }, { "epoch": 0.6178136686549899, "grad_norm": 1.782967448234558, "learning_rate": 3.3657577074197235e-06, "loss": 0.7793, "step": 17491 }, { "epoch": 0.6178489904586978, "grad_norm": 1.7274786233901978, "learning_rate": 3.3652171248718902e-06, "loss": 0.7705, "step": 17492 }, { "epoch": 0.6178843122624057, "grad_norm": 1.5969845056533813, "learning_rate": 3.3646765637197886e-06, "loss": 0.7708, "step": 17493 }, { "epoch": 0.6179196340661136, "grad_norm": 0.9304611682891846, "learning_rate": 3.3641360239704935e-06, "loss": 0.5561, "step": 17494 }, { "epoch": 0.6179549558698215, "grad_norm": 1.7000114917755127, "learning_rate": 3.3635955056310764e-06, "loss": 0.7812, "step": 17495 }, { "epoch": 0.6179902776735294, "grad_norm": 1.7708810567855835, "learning_rate": 3.3630550087086154e-06, "loss": 0.7565, "step": 17496 }, { "epoch": 0.6180255994772373, "grad_norm": 1.7410751581192017, "learning_rate": 3.362514533210183e-06, "loss": 0.7608, "step": 17497 }, { "epoch": 0.6180609212809453, "grad_norm": 1.5186899900436401, "learning_rate": 3.361974079142851e-06, "loss": 0.7767, "step": 17498 }, { "epoch": 0.6180962430846532, "grad_norm": 1.5881016254425049, "learning_rate": 3.361433646513697e-06, "loss": 0.7844, "step": 17499 }, { "epoch": 0.6181315648883611, "grad_norm": 1.5803452730178833, "learning_rate": 3.36089323532979e-06, "loss": 0.7647, "step": 17500 }, { "epoch": 0.618166886692069, "grad_norm": 1.594382405281067, "learning_rate": 3.3603528455982043e-06, "loss": 0.7907, "step": 17501 }, { "epoch": 0.6182022084957768, "grad_norm": 1.7464929819107056, "learning_rate": 3.359812477326011e-06, "loss": 0.8172, "step": 17502 }, { "epoch": 0.6182375302994847, "grad_norm": 1.8387352228164673, "learning_rate": 3.359272130520286e-06, "loss": 0.7726, "step": 17503 }, { "epoch": 0.6182728521031926, "grad_norm": 1.8308662176132202, "learning_rate": 3.3587318051880974e-06, "loss": 0.777, "step": 17504 }, { "epoch": 0.6183081739069005, "grad_norm": 1.6042065620422363, "learning_rate": 3.3581915013365176e-06, "loss": 0.7987, "step": 17505 }, { "epoch": 0.6183434957106084, "grad_norm": 1.6415276527404785, "learning_rate": 3.3576512189726207e-06, "loss": 0.7913, "step": 17506 }, { "epoch": 0.6183788175143163, "grad_norm": 1.6423571109771729, "learning_rate": 3.357110958103475e-06, "loss": 0.7929, "step": 17507 }, { "epoch": 0.6184141393180242, "grad_norm": 1.6876380443572998, "learning_rate": 3.3565707187361517e-06, "loss": 0.783, "step": 17508 }, { "epoch": 0.6184494611217322, "grad_norm": 1.9621782302856445, "learning_rate": 3.356030500877723e-06, "loss": 0.8001, "step": 17509 }, { "epoch": 0.6184847829254401, "grad_norm": 3.6855270862579346, "learning_rate": 3.3554903045352578e-06, "loss": 0.8037, "step": 17510 }, { "epoch": 0.618520104729148, "grad_norm": 1.6729010343551636, "learning_rate": 3.354950129715825e-06, "loss": 0.8185, "step": 17511 }, { "epoch": 0.6185554265328559, "grad_norm": 1.746541976928711, "learning_rate": 3.354409976426497e-06, "loss": 0.757, "step": 17512 }, { "epoch": 0.6185907483365638, "grad_norm": 1.557140588760376, "learning_rate": 3.353869844674341e-06, "loss": 0.7579, "step": 17513 }, { "epoch": 0.6186260701402717, "grad_norm": 1.9086596965789795, "learning_rate": 3.3533297344664282e-06, "loss": 0.7973, "step": 17514 }, { "epoch": 0.6186613919439796, "grad_norm": 1.6802235841751099, "learning_rate": 3.3527896458098265e-06, "loss": 0.7898, "step": 17515 }, { "epoch": 0.6186967137476875, "grad_norm": 1.6609374284744263, "learning_rate": 3.352249578711603e-06, "loss": 0.7633, "step": 17516 }, { "epoch": 0.6187320355513954, "grad_norm": 1.5950913429260254, "learning_rate": 3.3517095331788296e-06, "loss": 0.7551, "step": 17517 }, { "epoch": 0.6187673573551034, "grad_norm": 1.7431052923202515, "learning_rate": 3.3511695092185697e-06, "loss": 0.7679, "step": 17518 }, { "epoch": 0.6188026791588113, "grad_norm": 1.7758526802062988, "learning_rate": 3.3506295068378946e-06, "loss": 0.7861, "step": 17519 }, { "epoch": 0.6188380009625192, "grad_norm": 1.8286218643188477, "learning_rate": 3.3500895260438698e-06, "loss": 0.7964, "step": 17520 }, { "epoch": 0.6188733227662271, "grad_norm": 1.6847974061965942, "learning_rate": 3.3495495668435625e-06, "loss": 0.7719, "step": 17521 }, { "epoch": 0.618908644569935, "grad_norm": 1.7201268672943115, "learning_rate": 3.349009629244041e-06, "loss": 0.792, "step": 17522 }, { "epoch": 0.6189439663736429, "grad_norm": 1.6947318315505981, "learning_rate": 3.3484697132523704e-06, "loss": 0.7736, "step": 17523 }, { "epoch": 0.6189792881773508, "grad_norm": 0.896109402179718, "learning_rate": 3.3479298188756183e-06, "loss": 0.6032, "step": 17524 }, { "epoch": 0.6190146099810587, "grad_norm": 1.717389464378357, "learning_rate": 3.3473899461208505e-06, "loss": 0.7828, "step": 17525 }, { "epoch": 0.6190499317847666, "grad_norm": 1.7843023538589478, "learning_rate": 3.346850094995131e-06, "loss": 0.7851, "step": 17526 }, { "epoch": 0.6190852535884745, "grad_norm": 1.832734227180481, "learning_rate": 3.346310265505528e-06, "loss": 0.8192, "step": 17527 }, { "epoch": 0.6191205753921823, "grad_norm": 1.8365488052368164, "learning_rate": 3.3457704576591057e-06, "loss": 0.7712, "step": 17528 }, { "epoch": 0.6191558971958903, "grad_norm": 1.6920245885849, "learning_rate": 3.3452306714629274e-06, "loss": 0.7775, "step": 17529 }, { "epoch": 0.6191912189995982, "grad_norm": 1.7328273057937622, "learning_rate": 3.3446909069240598e-06, "loss": 0.8021, "step": 17530 }, { "epoch": 0.6192265408033061, "grad_norm": 1.6889655590057373, "learning_rate": 3.344151164049567e-06, "loss": 0.7851, "step": 17531 }, { "epoch": 0.619261862607014, "grad_norm": 1.5540118217468262, "learning_rate": 3.343611442846511e-06, "loss": 0.8107, "step": 17532 }, { "epoch": 0.6192971844107219, "grad_norm": 1.676108956336975, "learning_rate": 3.3430717433219584e-06, "loss": 0.7727, "step": 17533 }, { "epoch": 0.6193325062144298, "grad_norm": 1.6514859199523926, "learning_rate": 3.3425320654829713e-06, "loss": 0.7901, "step": 17534 }, { "epoch": 0.6193678280181377, "grad_norm": 1.6013901233673096, "learning_rate": 3.341992409336612e-06, "loss": 0.7696, "step": 17535 }, { "epoch": 0.6194031498218456, "grad_norm": 1.6533035039901733, "learning_rate": 3.3414527748899463e-06, "loss": 0.7358, "step": 17536 }, { "epoch": 0.6194384716255535, "grad_norm": 1.7739677429199219, "learning_rate": 3.3409131621500334e-06, "loss": 0.7649, "step": 17537 }, { "epoch": 0.6194737934292615, "grad_norm": 1.6926850080490112, "learning_rate": 3.3403735711239376e-06, "loss": 0.7431, "step": 17538 }, { "epoch": 0.6195091152329694, "grad_norm": 1.8073742389678955, "learning_rate": 3.3398340018187198e-06, "loss": 0.7902, "step": 17539 }, { "epoch": 0.6195444370366773, "grad_norm": 1.5834236145019531, "learning_rate": 3.339294454241443e-06, "loss": 0.7865, "step": 17540 }, { "epoch": 0.6195797588403852, "grad_norm": 1.7349145412445068, "learning_rate": 3.338754928399168e-06, "loss": 0.7957, "step": 17541 }, { "epoch": 0.6196150806440931, "grad_norm": 1.8127607107162476, "learning_rate": 3.338215424298956e-06, "loss": 0.762, "step": 17542 }, { "epoch": 0.619650402447801, "grad_norm": 1.6148841381072998, "learning_rate": 3.337675941947869e-06, "loss": 0.7654, "step": 17543 }, { "epoch": 0.6196857242515089, "grad_norm": 1.6793102025985718, "learning_rate": 3.3371364813529665e-06, "loss": 0.8013, "step": 17544 }, { "epoch": 0.6197210460552168, "grad_norm": 1.8577457666397095, "learning_rate": 3.3365970425213077e-06, "loss": 0.8307, "step": 17545 }, { "epoch": 0.6197563678589247, "grad_norm": 1.6998180150985718, "learning_rate": 3.3360576254599563e-06, "loss": 0.7907, "step": 17546 }, { "epoch": 0.6197916896626326, "grad_norm": 2.1589274406433105, "learning_rate": 3.335518230175968e-06, "loss": 0.7952, "step": 17547 }, { "epoch": 0.6198270114663406, "grad_norm": 2.2343525886535645, "learning_rate": 3.3349788566764056e-06, "loss": 0.7931, "step": 17548 }, { "epoch": 0.6198623332700485, "grad_norm": 1.7480276823043823, "learning_rate": 3.334439504968327e-06, "loss": 0.8057, "step": 17549 }, { "epoch": 0.6198976550737564, "grad_norm": 1.7716561555862427, "learning_rate": 3.3339001750587894e-06, "loss": 0.7721, "step": 17550 }, { "epoch": 0.6199329768774643, "grad_norm": 1.7285841703414917, "learning_rate": 3.333360866954855e-06, "loss": 0.8191, "step": 17551 }, { "epoch": 0.6199682986811722, "grad_norm": 2.0504815578460693, "learning_rate": 3.33282158066358e-06, "loss": 0.7941, "step": 17552 }, { "epoch": 0.6200036204848801, "grad_norm": 1.8421622514724731, "learning_rate": 3.3322823161920216e-06, "loss": 0.8185, "step": 17553 }, { "epoch": 0.6200389422885879, "grad_norm": 1.6436187028884888, "learning_rate": 3.3317430735472422e-06, "loss": 0.7776, "step": 17554 }, { "epoch": 0.6200742640922958, "grad_norm": 1.7153587341308594, "learning_rate": 3.3312038527362923e-06, "loss": 0.7651, "step": 17555 }, { "epoch": 0.6201095858960037, "grad_norm": 1.7560334205627441, "learning_rate": 3.3306646537662333e-06, "loss": 0.7543, "step": 17556 }, { "epoch": 0.6201449076997116, "grad_norm": 1.7739813327789307, "learning_rate": 3.3301254766441217e-06, "loss": 0.776, "step": 17557 }, { "epoch": 0.6201802295034196, "grad_norm": 1.485385537147522, "learning_rate": 3.329586321377014e-06, "loss": 0.7766, "step": 17558 }, { "epoch": 0.6202155513071275, "grad_norm": 1.7388204336166382, "learning_rate": 3.3290471879719665e-06, "loss": 0.7839, "step": 17559 }, { "epoch": 0.6202508731108354, "grad_norm": 1.7657575607299805, "learning_rate": 3.3285080764360344e-06, "loss": 0.7534, "step": 17560 }, { "epoch": 0.6202861949145433, "grad_norm": 1.60654616355896, "learning_rate": 3.327968986776276e-06, "loss": 0.7767, "step": 17561 }, { "epoch": 0.6203215167182512, "grad_norm": 1.721502661705017, "learning_rate": 3.327429918999745e-06, "loss": 0.797, "step": 17562 }, { "epoch": 0.6203568385219591, "grad_norm": 1.6003540754318237, "learning_rate": 3.3268908731134953e-06, "loss": 0.7817, "step": 17563 }, { "epoch": 0.620392160325667, "grad_norm": 2.416788339614868, "learning_rate": 3.326351849124585e-06, "loss": 0.8006, "step": 17564 }, { "epoch": 0.6204274821293749, "grad_norm": 1.6106736660003662, "learning_rate": 3.3258128470400665e-06, "loss": 0.7597, "step": 17565 }, { "epoch": 0.6204628039330828, "grad_norm": 1.7968592643737793, "learning_rate": 3.3252738668669937e-06, "loss": 0.7818, "step": 17566 }, { "epoch": 0.6204981257367908, "grad_norm": 3.5927953720092773, "learning_rate": 3.3247349086124236e-06, "loss": 0.7754, "step": 17567 }, { "epoch": 0.6205334475404987, "grad_norm": 2.013167142868042, "learning_rate": 3.324195972283407e-06, "loss": 0.8066, "step": 17568 }, { "epoch": 0.6205687693442066, "grad_norm": 1.66959547996521, "learning_rate": 3.323657057886998e-06, "loss": 0.8022, "step": 17569 }, { "epoch": 0.6206040911479145, "grad_norm": 1.6496394872665405, "learning_rate": 3.3231181654302514e-06, "loss": 0.7727, "step": 17570 }, { "epoch": 0.6206394129516224, "grad_norm": 1.85224187374115, "learning_rate": 3.322579294920218e-06, "loss": 0.7614, "step": 17571 }, { "epoch": 0.6206747347553303, "grad_norm": 1.6482144594192505, "learning_rate": 3.3220404463639524e-06, "loss": 0.8033, "step": 17572 }, { "epoch": 0.6207100565590382, "grad_norm": 1.7054857015609741, "learning_rate": 3.321501619768507e-06, "loss": 0.8074, "step": 17573 }, { "epoch": 0.6207453783627461, "grad_norm": 1.4771418571472168, "learning_rate": 3.3209628151409323e-06, "loss": 0.7857, "step": 17574 }, { "epoch": 0.620780700166454, "grad_norm": 2.688469409942627, "learning_rate": 3.3204240324882797e-06, "loss": 0.8014, "step": 17575 }, { "epoch": 0.620816021970162, "grad_norm": 1.8238234519958496, "learning_rate": 3.3198852718176013e-06, "loss": 0.8048, "step": 17576 }, { "epoch": 0.6208513437738699, "grad_norm": 1.836592674255371, "learning_rate": 3.31934653313595e-06, "loss": 0.7693, "step": 17577 }, { "epoch": 0.6208866655775778, "grad_norm": 1.6661041975021362, "learning_rate": 3.3188078164503745e-06, "loss": 0.7814, "step": 17578 }, { "epoch": 0.6209219873812857, "grad_norm": 3.2476565837860107, "learning_rate": 3.3182691217679257e-06, "loss": 0.7596, "step": 17579 }, { "epoch": 0.6209573091849935, "grad_norm": 0.9434905052185059, "learning_rate": 3.3177304490956553e-06, "loss": 0.5585, "step": 17580 }, { "epoch": 0.6209926309887014, "grad_norm": 1.751102328300476, "learning_rate": 3.317191798440612e-06, "loss": 0.7924, "step": 17581 }, { "epoch": 0.6210279527924093, "grad_norm": 1.6358222961425781, "learning_rate": 3.316653169809847e-06, "loss": 0.7849, "step": 17582 }, { "epoch": 0.6210632745961172, "grad_norm": 1.6746203899383545, "learning_rate": 3.3161145632104084e-06, "loss": 0.7937, "step": 17583 }, { "epoch": 0.6210985963998251, "grad_norm": 2.8035991191864014, "learning_rate": 3.3155759786493447e-06, "loss": 0.8082, "step": 17584 }, { "epoch": 0.621133918203533, "grad_norm": 1.7114055156707764, "learning_rate": 3.3150374161337073e-06, "loss": 0.7851, "step": 17585 }, { "epoch": 0.6211692400072409, "grad_norm": 1.7972906827926636, "learning_rate": 3.3144988756705444e-06, "loss": 0.8045, "step": 17586 }, { "epoch": 0.6212045618109489, "grad_norm": 1.6512210369110107, "learning_rate": 3.313960357266901e-06, "loss": 0.7915, "step": 17587 }, { "epoch": 0.6212398836146568, "grad_norm": 1.812638759613037, "learning_rate": 3.313421860929829e-06, "loss": 0.7763, "step": 17588 }, { "epoch": 0.6212752054183647, "grad_norm": 1.9013844728469849, "learning_rate": 3.3128833866663745e-06, "loss": 0.8055, "step": 17589 }, { "epoch": 0.6213105272220726, "grad_norm": 1.605637550354004, "learning_rate": 3.3123449344835844e-06, "loss": 0.7915, "step": 17590 }, { "epoch": 0.6213458490257805, "grad_norm": 2.3409054279327393, "learning_rate": 3.3118065043885083e-06, "loss": 0.7913, "step": 17591 }, { "epoch": 0.6213811708294884, "grad_norm": 1.6287331581115723, "learning_rate": 3.3112680963881904e-06, "loss": 0.7932, "step": 17592 }, { "epoch": 0.6214164926331963, "grad_norm": 1.5756741762161255, "learning_rate": 3.3107297104896785e-06, "loss": 0.7901, "step": 17593 }, { "epoch": 0.6214518144369042, "grad_norm": 1.660125732421875, "learning_rate": 3.3101913467000175e-06, "loss": 0.7625, "step": 17594 }, { "epoch": 0.6214871362406121, "grad_norm": 1.9405276775360107, "learning_rate": 3.3096530050262554e-06, "loss": 0.7772, "step": 17595 }, { "epoch": 0.62152245804432, "grad_norm": 1.7378356456756592, "learning_rate": 3.309114685475437e-06, "loss": 0.7774, "step": 17596 }, { "epoch": 0.621557779848028, "grad_norm": 1.77958083152771, "learning_rate": 3.308576388054607e-06, "loss": 0.7784, "step": 17597 }, { "epoch": 0.6215931016517359, "grad_norm": 1.6394003629684448, "learning_rate": 3.3080381127708115e-06, "loss": 0.7443, "step": 17598 }, { "epoch": 0.6216284234554438, "grad_norm": 2.087758779525757, "learning_rate": 3.307499859631096e-06, "loss": 0.7876, "step": 17599 }, { "epoch": 0.6216637452591517, "grad_norm": 2.0396742820739746, "learning_rate": 3.306961628642503e-06, "loss": 0.7991, "step": 17600 }, { "epoch": 0.6216990670628596, "grad_norm": 1.8471826314926147, "learning_rate": 3.306423419812078e-06, "loss": 0.7936, "step": 17601 }, { "epoch": 0.6217343888665675, "grad_norm": 1.5971431732177734, "learning_rate": 3.305885233146866e-06, "loss": 0.7615, "step": 17602 }, { "epoch": 0.6217697106702754, "grad_norm": 1.6014841794967651, "learning_rate": 3.305347068653908e-06, "loss": 0.7734, "step": 17603 }, { "epoch": 0.6218050324739833, "grad_norm": 1.6420857906341553, "learning_rate": 3.3048089263402507e-06, "loss": 0.7748, "step": 17604 }, { "epoch": 0.6218403542776912, "grad_norm": 1.7125340700149536, "learning_rate": 3.304270806212934e-06, "loss": 0.7772, "step": 17605 }, { "epoch": 0.621875676081399, "grad_norm": 1.912716269493103, "learning_rate": 3.303732708279003e-06, "loss": 0.7945, "step": 17606 }, { "epoch": 0.621910997885107, "grad_norm": 1.6557120084762573, "learning_rate": 3.3031946325455e-06, "loss": 0.7735, "step": 17607 }, { "epoch": 0.6219463196888149, "grad_norm": 1.7536189556121826, "learning_rate": 3.3026565790194655e-06, "loss": 0.7635, "step": 17608 }, { "epoch": 0.6219816414925228, "grad_norm": 1.7004319429397583, "learning_rate": 3.3021185477079435e-06, "loss": 0.8131, "step": 17609 }, { "epoch": 0.6220169632962307, "grad_norm": 1.6166627407073975, "learning_rate": 3.301580538617976e-06, "loss": 0.8106, "step": 17610 }, { "epoch": 0.6220522850999386, "grad_norm": 1.800414800643921, "learning_rate": 3.3010425517566026e-06, "loss": 0.8212, "step": 17611 }, { "epoch": 0.6220876069036465, "grad_norm": 1.612633228302002, "learning_rate": 3.300504587130864e-06, "loss": 0.7805, "step": 17612 }, { "epoch": 0.6221229287073544, "grad_norm": 1.584995985031128, "learning_rate": 3.2999666447478012e-06, "loss": 0.7828, "step": 17613 }, { "epoch": 0.6221582505110623, "grad_norm": 1.51749849319458, "learning_rate": 3.2994287246144565e-06, "loss": 0.7443, "step": 17614 }, { "epoch": 0.6221935723147702, "grad_norm": 1.7723208665847778, "learning_rate": 3.2988908267378676e-06, "loss": 0.7999, "step": 17615 }, { "epoch": 0.6222288941184781, "grad_norm": 1.6639697551727295, "learning_rate": 3.2983529511250773e-06, "loss": 0.7644, "step": 17616 }, { "epoch": 0.6222642159221861, "grad_norm": 1.6218745708465576, "learning_rate": 3.2978150977831235e-06, "loss": 0.8101, "step": 17617 }, { "epoch": 0.622299537725894, "grad_norm": 1.800189733505249, "learning_rate": 3.2972772667190447e-06, "loss": 0.822, "step": 17618 }, { "epoch": 0.6223348595296019, "grad_norm": 1.6716628074645996, "learning_rate": 3.2967394579398813e-06, "loss": 0.7969, "step": 17619 }, { "epoch": 0.6223701813333098, "grad_norm": 1.6501597166061401, "learning_rate": 3.2962016714526727e-06, "loss": 0.7536, "step": 17620 }, { "epoch": 0.6224055031370177, "grad_norm": 1.642638921737671, "learning_rate": 3.2956639072644552e-06, "loss": 0.7858, "step": 17621 }, { "epoch": 0.6224408249407256, "grad_norm": 1.9330685138702393, "learning_rate": 3.2951261653822685e-06, "loss": 0.763, "step": 17622 }, { "epoch": 0.6224761467444335, "grad_norm": 1.6749153137207031, "learning_rate": 3.2945884458131505e-06, "loss": 0.8057, "step": 17623 }, { "epoch": 0.6225114685481414, "grad_norm": 1.6147855520248413, "learning_rate": 3.2940507485641375e-06, "loss": 0.7534, "step": 17624 }, { "epoch": 0.6225467903518493, "grad_norm": 1.6494146585464478, "learning_rate": 3.2935130736422684e-06, "loss": 0.7559, "step": 17625 }, { "epoch": 0.6225821121555573, "grad_norm": 1.8004405498504639, "learning_rate": 3.2929754210545796e-06, "loss": 0.7701, "step": 17626 }, { "epoch": 0.6226174339592652, "grad_norm": 1.7648791074752808, "learning_rate": 3.2924377908081064e-06, "loss": 0.7374, "step": 17627 }, { "epoch": 0.6226527557629731, "grad_norm": 1.5369834899902344, "learning_rate": 3.2919001829098885e-06, "loss": 0.7977, "step": 17628 }, { "epoch": 0.622688077566681, "grad_norm": 1.6784125566482544, "learning_rate": 3.2913625973669593e-06, "loss": 0.8246, "step": 17629 }, { "epoch": 0.6227233993703889, "grad_norm": 1.7887029647827148, "learning_rate": 3.2908250341863547e-06, "loss": 0.7579, "step": 17630 }, { "epoch": 0.6227587211740968, "grad_norm": 1.6639750003814697, "learning_rate": 3.2902874933751093e-06, "loss": 0.7698, "step": 17631 }, { "epoch": 0.6227940429778046, "grad_norm": 1.674561619758606, "learning_rate": 3.289749974940262e-06, "loss": 0.8045, "step": 17632 }, { "epoch": 0.6228293647815125, "grad_norm": 1.9585225582122803, "learning_rate": 3.2892124788888446e-06, "loss": 0.8016, "step": 17633 }, { "epoch": 0.6228646865852204, "grad_norm": 1.9759191274642944, "learning_rate": 3.288675005227892e-06, "loss": 0.7927, "step": 17634 }, { "epoch": 0.6229000083889283, "grad_norm": 1.5321131944656372, "learning_rate": 3.28813755396444e-06, "loss": 0.7601, "step": 17635 }, { "epoch": 0.6229353301926362, "grad_norm": 1.8406891822814941, "learning_rate": 3.287600125105522e-06, "loss": 0.8165, "step": 17636 }, { "epoch": 0.6229706519963442, "grad_norm": 1.504287600517273, "learning_rate": 3.287062718658171e-06, "loss": 0.7968, "step": 17637 }, { "epoch": 0.6230059738000521, "grad_norm": 1.6919686794281006, "learning_rate": 3.2865253346294226e-06, "loss": 0.7493, "step": 17638 }, { "epoch": 0.62304129560376, "grad_norm": 1.6494501829147339, "learning_rate": 3.285987973026306e-06, "loss": 0.7561, "step": 17639 }, { "epoch": 0.6230766174074679, "grad_norm": 1.7189826965332031, "learning_rate": 3.285450633855859e-06, "loss": 0.7895, "step": 17640 }, { "epoch": 0.6231119392111758, "grad_norm": 1.6985037326812744, "learning_rate": 3.2849133171251114e-06, "loss": 0.8131, "step": 17641 }, { "epoch": 0.6231472610148837, "grad_norm": 1.8734499216079712, "learning_rate": 3.2843760228410943e-06, "loss": 0.7675, "step": 17642 }, { "epoch": 0.6231825828185916, "grad_norm": 1.7998476028442383, "learning_rate": 3.2838387510108427e-06, "loss": 0.7722, "step": 17643 }, { "epoch": 0.6232179046222995, "grad_norm": 1.6208112239837646, "learning_rate": 3.283301501641387e-06, "loss": 0.7673, "step": 17644 }, { "epoch": 0.6232532264260074, "grad_norm": 1.5430169105529785, "learning_rate": 3.2827642747397574e-06, "loss": 0.7693, "step": 17645 }, { "epoch": 0.6232885482297154, "grad_norm": 1.6384875774383545, "learning_rate": 3.282227070312988e-06, "loss": 0.7934, "step": 17646 }, { "epoch": 0.6233238700334233, "grad_norm": 1.6581523418426514, "learning_rate": 3.2816898883681083e-06, "loss": 0.7487, "step": 17647 }, { "epoch": 0.6233591918371312, "grad_norm": 1.5963770151138306, "learning_rate": 3.281152728912147e-06, "loss": 0.8003, "step": 17648 }, { "epoch": 0.6233945136408391, "grad_norm": 1.5817532539367676, "learning_rate": 3.2806155919521344e-06, "loss": 0.7607, "step": 17649 }, { "epoch": 0.623429835444547, "grad_norm": 1.7527871131896973, "learning_rate": 3.2800784774951034e-06, "loss": 0.7699, "step": 17650 }, { "epoch": 0.6234651572482549, "grad_norm": 1.8361479043960571, "learning_rate": 3.2795413855480818e-06, "loss": 0.8215, "step": 17651 }, { "epoch": 0.6235004790519628, "grad_norm": 1.5053669214248657, "learning_rate": 3.2790043161180974e-06, "loss": 0.7731, "step": 17652 }, { "epoch": 0.6235358008556707, "grad_norm": 1.5516477823257446, "learning_rate": 3.2784672692121832e-06, "loss": 0.8062, "step": 17653 }, { "epoch": 0.6235711226593786, "grad_norm": 1.760650396347046, "learning_rate": 3.277930244837365e-06, "loss": 0.7763, "step": 17654 }, { "epoch": 0.6236064444630866, "grad_norm": 1.694061517715454, "learning_rate": 3.277393243000671e-06, "loss": 0.7515, "step": 17655 }, { "epoch": 0.6236417662667945, "grad_norm": 1.717515230178833, "learning_rate": 3.2768562637091315e-06, "loss": 0.7656, "step": 17656 }, { "epoch": 0.6236770880705024, "grad_norm": 1.6640995740890503, "learning_rate": 3.276319306969774e-06, "loss": 0.7669, "step": 17657 }, { "epoch": 0.6237124098742102, "grad_norm": 1.654651403427124, "learning_rate": 3.2757823727896236e-06, "loss": 0.7904, "step": 17658 }, { "epoch": 0.6237477316779181, "grad_norm": 1.5762494802474976, "learning_rate": 3.275245461175711e-06, "loss": 0.7799, "step": 17659 }, { "epoch": 0.623783053481626, "grad_norm": 1.5671231746673584, "learning_rate": 3.2747085721350614e-06, "loss": 0.8112, "step": 17660 }, { "epoch": 0.6238183752853339, "grad_norm": 1.5479539632797241, "learning_rate": 3.2741717056747004e-06, "loss": 0.7682, "step": 17661 }, { "epoch": 0.6238536970890418, "grad_norm": 1.8731706142425537, "learning_rate": 3.2736348618016566e-06, "loss": 0.8253, "step": 17662 }, { "epoch": 0.6238890188927497, "grad_norm": 1.5717204809188843, "learning_rate": 3.2730980405229545e-06, "loss": 0.7848, "step": 17663 }, { "epoch": 0.6239243406964576, "grad_norm": 1.6374125480651855, "learning_rate": 3.2725612418456215e-06, "loss": 0.7615, "step": 17664 }, { "epoch": 0.6239596625001655, "grad_norm": 1.565744400024414, "learning_rate": 3.272024465776683e-06, "loss": 0.7858, "step": 17665 }, { "epoch": 0.6239949843038735, "grad_norm": 1.779905080795288, "learning_rate": 3.2714877123231632e-06, "loss": 0.7678, "step": 17666 }, { "epoch": 0.6240303061075814, "grad_norm": 1.6770827770233154, "learning_rate": 3.270950981492087e-06, "loss": 0.758, "step": 17667 }, { "epoch": 0.6240656279112893, "grad_norm": 1.6760791540145874, "learning_rate": 3.270414273290478e-06, "loss": 0.7907, "step": 17668 }, { "epoch": 0.6241009497149972, "grad_norm": 1.8186357021331787, "learning_rate": 3.269877587725363e-06, "loss": 0.8092, "step": 17669 }, { "epoch": 0.6241362715187051, "grad_norm": 1.62178635597229, "learning_rate": 3.2693409248037656e-06, "loss": 0.767, "step": 17670 }, { "epoch": 0.624171593322413, "grad_norm": 1.6537494659423828, "learning_rate": 3.268804284532707e-06, "loss": 0.7776, "step": 17671 }, { "epoch": 0.6242069151261209, "grad_norm": 1.5290786027908325, "learning_rate": 3.2682676669192136e-06, "loss": 0.7916, "step": 17672 }, { "epoch": 0.6242422369298288, "grad_norm": 1.6709978580474854, "learning_rate": 3.2677310719703065e-06, "loss": 0.7957, "step": 17673 }, { "epoch": 0.6242775587335367, "grad_norm": 1.465382695198059, "learning_rate": 3.2671944996930105e-06, "loss": 0.7472, "step": 17674 }, { "epoch": 0.6243128805372447, "grad_norm": 1.7260369062423706, "learning_rate": 3.2666579500943473e-06, "loss": 0.7787, "step": 17675 }, { "epoch": 0.6243482023409526, "grad_norm": 1.614680290222168, "learning_rate": 3.2661214231813377e-06, "loss": 0.7759, "step": 17676 }, { "epoch": 0.6243835241446605, "grad_norm": 2.3861308097839355, "learning_rate": 3.2655849189610066e-06, "loss": 0.8015, "step": 17677 }, { "epoch": 0.6244188459483684, "grad_norm": 1.6848927736282349, "learning_rate": 3.2650484374403736e-06, "loss": 0.7311, "step": 17678 }, { "epoch": 0.6244541677520763, "grad_norm": 1.4263880252838135, "learning_rate": 3.2645119786264595e-06, "loss": 0.7318, "step": 17679 }, { "epoch": 0.6244894895557842, "grad_norm": 1.6460198163986206, "learning_rate": 3.2639755425262874e-06, "loss": 0.7735, "step": 17680 }, { "epoch": 0.6245248113594921, "grad_norm": 1.7175606489181519, "learning_rate": 3.263439129146877e-06, "loss": 0.8207, "step": 17681 }, { "epoch": 0.6245601331632, "grad_norm": 1.6661503314971924, "learning_rate": 3.2629027384952484e-06, "loss": 0.751, "step": 17682 }, { "epoch": 0.6245954549669079, "grad_norm": 1.6646121740341187, "learning_rate": 3.2623663705784224e-06, "loss": 0.7674, "step": 17683 }, { "epoch": 0.6246307767706157, "grad_norm": 1.6423014402389526, "learning_rate": 3.26183002540342e-06, "loss": 0.7913, "step": 17684 }, { "epoch": 0.6246660985743236, "grad_norm": 1.8648573160171509, "learning_rate": 3.2612937029772586e-06, "loss": 0.7806, "step": 17685 }, { "epoch": 0.6247014203780316, "grad_norm": 1.6196759939193726, "learning_rate": 3.260757403306958e-06, "loss": 0.8175, "step": 17686 }, { "epoch": 0.6247367421817395, "grad_norm": 1.5712225437164307, "learning_rate": 3.2602211263995377e-06, "loss": 0.7998, "step": 17687 }, { "epoch": 0.6247720639854474, "grad_norm": 1.6963449716567993, "learning_rate": 3.259684872262016e-06, "loss": 0.7745, "step": 17688 }, { "epoch": 0.6248073857891553, "grad_norm": 1.835084319114685, "learning_rate": 3.2591486409014113e-06, "loss": 0.7907, "step": 17689 }, { "epoch": 0.6248427075928632, "grad_norm": 1.5601428747177124, "learning_rate": 3.2586124323247425e-06, "loss": 0.7334, "step": 17690 }, { "epoch": 0.6248780293965711, "grad_norm": 1.7172951698303223, "learning_rate": 3.258076246539027e-06, "loss": 0.7659, "step": 17691 }, { "epoch": 0.624913351200279, "grad_norm": 1.4660378694534302, "learning_rate": 3.2575400835512816e-06, "loss": 0.7571, "step": 17692 }, { "epoch": 0.6249486730039869, "grad_norm": 1.9956697225570679, "learning_rate": 3.257003943368524e-06, "loss": 0.8266, "step": 17693 }, { "epoch": 0.6249839948076948, "grad_norm": 1.546312689781189, "learning_rate": 3.2564678259977723e-06, "loss": 0.7558, "step": 17694 }, { "epoch": 0.6250193166114028, "grad_norm": 1.6519131660461426, "learning_rate": 3.2559317314460404e-06, "loss": 0.7912, "step": 17695 }, { "epoch": 0.6250546384151107, "grad_norm": 1.8915143013000488, "learning_rate": 3.2553956597203467e-06, "loss": 0.7926, "step": 17696 }, { "epoch": 0.6250899602188186, "grad_norm": 1.5557488203048706, "learning_rate": 3.2548596108277064e-06, "loss": 0.7866, "step": 17697 }, { "epoch": 0.6251252820225265, "grad_norm": 1.6580795049667358, "learning_rate": 3.254323584775136e-06, "loss": 0.7935, "step": 17698 }, { "epoch": 0.6251606038262344, "grad_norm": 1.5363750457763672, "learning_rate": 3.2537875815696507e-06, "loss": 0.7687, "step": 17699 }, { "epoch": 0.6251959256299423, "grad_norm": 1.5716400146484375, "learning_rate": 3.2532516012182634e-06, "loss": 0.7793, "step": 17700 }, { "epoch": 0.6252312474336502, "grad_norm": 1.6429882049560547, "learning_rate": 3.252715643727993e-06, "loss": 0.7962, "step": 17701 }, { "epoch": 0.6252665692373581, "grad_norm": 1.6216524839401245, "learning_rate": 3.252179709105852e-06, "loss": 0.7814, "step": 17702 }, { "epoch": 0.625301891041066, "grad_norm": 1.9633774757385254, "learning_rate": 3.251643797358854e-06, "loss": 0.7611, "step": 17703 }, { "epoch": 0.625337212844774, "grad_norm": 1.6763697862625122, "learning_rate": 3.251107908494013e-06, "loss": 0.7811, "step": 17704 }, { "epoch": 0.6253725346484819, "grad_norm": 1.6984338760375977, "learning_rate": 3.2505720425183417e-06, "loss": 0.8251, "step": 17705 }, { "epoch": 0.6254078564521898, "grad_norm": 1.5498894453048706, "learning_rate": 3.250036199438856e-06, "loss": 0.7758, "step": 17706 }, { "epoch": 0.6254431782558977, "grad_norm": 1.662227749824524, "learning_rate": 3.249500379262567e-06, "loss": 0.7806, "step": 17707 }, { "epoch": 0.6254785000596056, "grad_norm": 1.7662144899368286, "learning_rate": 3.2489645819964876e-06, "loss": 0.7909, "step": 17708 }, { "epoch": 0.6255138218633135, "grad_norm": 1.6525242328643799, "learning_rate": 3.2484288076476317e-06, "loss": 0.7875, "step": 17709 }, { "epoch": 0.6255491436670213, "grad_norm": 1.7286796569824219, "learning_rate": 3.2478930562230095e-06, "loss": 0.771, "step": 17710 }, { "epoch": 0.6255844654707292, "grad_norm": 1.823657512664795, "learning_rate": 3.2473573277296337e-06, "loss": 0.7974, "step": 17711 }, { "epoch": 0.6256197872744371, "grad_norm": 1.6098464727401733, "learning_rate": 3.246821622174516e-06, "loss": 0.7863, "step": 17712 }, { "epoch": 0.625655109078145, "grad_norm": 1.5631695985794067, "learning_rate": 3.2462859395646665e-06, "loss": 0.7556, "step": 17713 }, { "epoch": 0.6256904308818529, "grad_norm": 2.214841842651367, "learning_rate": 3.2457502799070982e-06, "loss": 0.7877, "step": 17714 }, { "epoch": 0.6257257526855609, "grad_norm": 1.833460807800293, "learning_rate": 3.2452146432088204e-06, "loss": 0.8018, "step": 17715 }, { "epoch": 0.6257610744892688, "grad_norm": 1.9454251527786255, "learning_rate": 3.244679029476843e-06, "loss": 0.7647, "step": 17716 }, { "epoch": 0.6257963962929767, "grad_norm": 1.5049703121185303, "learning_rate": 3.2441434387181766e-06, "loss": 0.7485, "step": 17717 }, { "epoch": 0.6258317180966846, "grad_norm": 1.6059378385543823, "learning_rate": 3.243607870939831e-06, "loss": 0.7614, "step": 17718 }, { "epoch": 0.6258670399003925, "grad_norm": 1.899857521057129, "learning_rate": 3.243072326148814e-06, "loss": 0.7672, "step": 17719 }, { "epoch": 0.6259023617041004, "grad_norm": 1.8553853034973145, "learning_rate": 3.2425368043521387e-06, "loss": 0.8088, "step": 17720 }, { "epoch": 0.6259376835078083, "grad_norm": 1.8144798278808594, "learning_rate": 3.2420013055568107e-06, "loss": 0.7907, "step": 17721 }, { "epoch": 0.6259730053115162, "grad_norm": 1.5595695972442627, "learning_rate": 3.241465829769839e-06, "loss": 0.8104, "step": 17722 }, { "epoch": 0.6260083271152241, "grad_norm": 1.6707123517990112, "learning_rate": 3.2409303769982303e-06, "loss": 0.7868, "step": 17723 }, { "epoch": 0.626043648918932, "grad_norm": 1.7539966106414795, "learning_rate": 3.2403949472489954e-06, "loss": 0.7842, "step": 17724 }, { "epoch": 0.62607897072264, "grad_norm": 1.7537955045700073, "learning_rate": 3.239859540529141e-06, "loss": 0.8065, "step": 17725 }, { "epoch": 0.6261142925263479, "grad_norm": 1.7883366346359253, "learning_rate": 3.239324156845672e-06, "loss": 0.7966, "step": 17726 }, { "epoch": 0.6261496143300558, "grad_norm": 1.9157116413116455, "learning_rate": 3.2387887962055997e-06, "loss": 0.8235, "step": 17727 }, { "epoch": 0.6261849361337637, "grad_norm": 1.6422193050384521, "learning_rate": 3.238253458615929e-06, "loss": 0.7837, "step": 17728 }, { "epoch": 0.6262202579374716, "grad_norm": 1.6526859998703003, "learning_rate": 3.2377181440836637e-06, "loss": 0.7889, "step": 17729 }, { "epoch": 0.6262555797411795, "grad_norm": 1.6727710962295532, "learning_rate": 3.2371828526158134e-06, "loss": 0.7748, "step": 17730 }, { "epoch": 0.6262909015448874, "grad_norm": 1.7508212327957153, "learning_rate": 3.236647584219381e-06, "loss": 0.7826, "step": 17731 }, { "epoch": 0.6263262233485953, "grad_norm": 1.5665032863616943, "learning_rate": 3.236112338901375e-06, "loss": 0.7816, "step": 17732 }, { "epoch": 0.6263615451523032, "grad_norm": 3.0656790733337402, "learning_rate": 3.2355771166688e-06, "loss": 0.8082, "step": 17733 }, { "epoch": 0.6263968669560112, "grad_norm": 1.7444484233856201, "learning_rate": 3.2350419175286573e-06, "loss": 0.8007, "step": 17734 }, { "epoch": 0.6264321887597191, "grad_norm": 2.2105422019958496, "learning_rate": 3.2345067414879565e-06, "loss": 0.7748, "step": 17735 }, { "epoch": 0.6264675105634269, "grad_norm": 1.7668884992599487, "learning_rate": 3.2339715885536992e-06, "loss": 0.7708, "step": 17736 }, { "epoch": 0.6265028323671348, "grad_norm": 1.6631543636322021, "learning_rate": 3.233436458732889e-06, "loss": 0.7637, "step": 17737 }, { "epoch": 0.6265381541708427, "grad_norm": 3.7468326091766357, "learning_rate": 3.2329013520325314e-06, "loss": 0.8045, "step": 17738 }, { "epoch": 0.6265734759745506, "grad_norm": 1.7375596761703491, "learning_rate": 3.232366268459629e-06, "loss": 0.7969, "step": 17739 }, { "epoch": 0.6266087977782585, "grad_norm": 1.6328946352005005, "learning_rate": 3.2318312080211845e-06, "loss": 0.7606, "step": 17740 }, { "epoch": 0.6266441195819664, "grad_norm": 1.709672212600708, "learning_rate": 3.231296170724199e-06, "loss": 0.7414, "step": 17741 }, { "epoch": 0.6266794413856743, "grad_norm": 1.6746296882629395, "learning_rate": 3.230761156575678e-06, "loss": 0.811, "step": 17742 }, { "epoch": 0.6267147631893822, "grad_norm": 1.5196866989135742, "learning_rate": 3.2302261655826227e-06, "loss": 0.7739, "step": 17743 }, { "epoch": 0.6267500849930902, "grad_norm": 1.6216416358947754, "learning_rate": 3.2296911977520334e-06, "loss": 0.7828, "step": 17744 }, { "epoch": 0.6267854067967981, "grad_norm": 1.7162357568740845, "learning_rate": 3.2291562530909137e-06, "loss": 0.7499, "step": 17745 }, { "epoch": 0.626820728600506, "grad_norm": 1.9325032234191895, "learning_rate": 3.228621331606265e-06, "loss": 0.7792, "step": 17746 }, { "epoch": 0.6268560504042139, "grad_norm": 2.3524720668792725, "learning_rate": 3.2280864333050855e-06, "loss": 0.7449, "step": 17747 }, { "epoch": 0.6268913722079218, "grad_norm": 1.6968010663986206, "learning_rate": 3.227551558194379e-06, "loss": 0.812, "step": 17748 }, { "epoch": 0.6269266940116297, "grad_norm": 1.7194336652755737, "learning_rate": 3.2270167062811442e-06, "loss": 0.7671, "step": 17749 }, { "epoch": 0.6269620158153376, "grad_norm": 1.7252278327941895, "learning_rate": 3.2264818775723806e-06, "loss": 0.7569, "step": 17750 }, { "epoch": 0.6269973376190455, "grad_norm": 1.5729397535324097, "learning_rate": 3.2259470720750894e-06, "loss": 0.7822, "step": 17751 }, { "epoch": 0.6270326594227534, "grad_norm": 1.6212592124938965, "learning_rate": 3.2254122897962703e-06, "loss": 0.7814, "step": 17752 }, { "epoch": 0.6270679812264613, "grad_norm": 1.6583524942398071, "learning_rate": 3.22487753074292e-06, "loss": 0.7592, "step": 17753 }, { "epoch": 0.6271033030301693, "grad_norm": 1.5973718166351318, "learning_rate": 3.224342794922041e-06, "loss": 0.76, "step": 17754 }, { "epoch": 0.6271386248338772, "grad_norm": 1.5992999076843262, "learning_rate": 3.223808082340627e-06, "loss": 0.7464, "step": 17755 }, { "epoch": 0.6271739466375851, "grad_norm": 1.579020380973816, "learning_rate": 3.2232733930056807e-06, "loss": 0.7565, "step": 17756 }, { "epoch": 0.627209268441293, "grad_norm": 1.6243724822998047, "learning_rate": 3.2227387269241995e-06, "loss": 0.7874, "step": 17757 }, { "epoch": 0.6272445902450009, "grad_norm": 1.5637444257736206, "learning_rate": 3.2222040841031786e-06, "loss": 0.7728, "step": 17758 }, { "epoch": 0.6272799120487088, "grad_norm": 1.6341708898544312, "learning_rate": 3.221669464549616e-06, "loss": 0.7703, "step": 17759 }, { "epoch": 0.6273152338524167, "grad_norm": 1.807482123374939, "learning_rate": 3.2211348682705087e-06, "loss": 0.7343, "step": 17760 }, { "epoch": 0.6273505556561246, "grad_norm": 1.690219759941101, "learning_rate": 3.2206002952728543e-06, "loss": 0.7801, "step": 17761 }, { "epoch": 0.6273858774598325, "grad_norm": 1.588555097579956, "learning_rate": 3.2200657455636496e-06, "loss": 0.7654, "step": 17762 }, { "epoch": 0.6274211992635403, "grad_norm": 1.6051088571548462, "learning_rate": 3.2195312191498885e-06, "loss": 0.7863, "step": 17763 }, { "epoch": 0.6274565210672483, "grad_norm": 1.6488550901412964, "learning_rate": 3.2189967160385683e-06, "loss": 0.766, "step": 17764 }, { "epoch": 0.6274918428709562, "grad_norm": 1.7111742496490479, "learning_rate": 3.218462236236684e-06, "loss": 0.7603, "step": 17765 }, { "epoch": 0.6275271646746641, "grad_norm": 1.620396375656128, "learning_rate": 3.217927779751232e-06, "loss": 0.765, "step": 17766 }, { "epoch": 0.627562486478372, "grad_norm": 1.643520474433899, "learning_rate": 3.217393346589206e-06, "loss": 0.7412, "step": 17767 }, { "epoch": 0.6275978082820799, "grad_norm": 1.7176222801208496, "learning_rate": 3.2168589367576004e-06, "loss": 0.7834, "step": 17768 }, { "epoch": 0.6276331300857878, "grad_norm": 1.6352871656417847, "learning_rate": 3.2163245502634107e-06, "loss": 0.7953, "step": 17769 }, { "epoch": 0.6276684518894957, "grad_norm": 1.6985862255096436, "learning_rate": 3.2157901871136293e-06, "loss": 0.7528, "step": 17770 }, { "epoch": 0.6277037736932036, "grad_norm": 1.6923030614852905, "learning_rate": 3.21525584731525e-06, "loss": 0.7624, "step": 17771 }, { "epoch": 0.6277390954969115, "grad_norm": 1.6540687084197998, "learning_rate": 3.214721530875268e-06, "loss": 0.7874, "step": 17772 }, { "epoch": 0.6277744173006194, "grad_norm": 1.6517105102539062, "learning_rate": 3.2141872378006742e-06, "loss": 0.762, "step": 17773 }, { "epoch": 0.6278097391043274, "grad_norm": 1.7719722986221313, "learning_rate": 3.213652968098462e-06, "loss": 0.7855, "step": 17774 }, { "epoch": 0.6278450609080353, "grad_norm": 1.672910451889038, "learning_rate": 3.2131187217756245e-06, "loss": 0.7587, "step": 17775 }, { "epoch": 0.6278803827117432, "grad_norm": 2.3052847385406494, "learning_rate": 3.212584498839154e-06, "loss": 0.7867, "step": 17776 }, { "epoch": 0.6279157045154511, "grad_norm": 1.4921249151229858, "learning_rate": 3.2120502992960413e-06, "loss": 0.7765, "step": 17777 }, { "epoch": 0.627951026319159, "grad_norm": 1.7229433059692383, "learning_rate": 3.2115161231532763e-06, "loss": 0.758, "step": 17778 }, { "epoch": 0.6279863481228669, "grad_norm": 1.773559808731079, "learning_rate": 3.2109819704178535e-06, "loss": 0.7798, "step": 17779 }, { "epoch": 0.6280216699265748, "grad_norm": 1.6748429536819458, "learning_rate": 3.2104478410967634e-06, "loss": 0.7585, "step": 17780 }, { "epoch": 0.6280569917302827, "grad_norm": 1.7832696437835693, "learning_rate": 3.2099137351969935e-06, "loss": 0.7629, "step": 17781 }, { "epoch": 0.6280923135339906, "grad_norm": 1.6319369077682495, "learning_rate": 3.209379652725537e-06, "loss": 0.8163, "step": 17782 }, { "epoch": 0.6281276353376986, "grad_norm": 1.624602198600769, "learning_rate": 3.208845593689383e-06, "loss": 0.7585, "step": 17783 }, { "epoch": 0.6281629571414065, "grad_norm": 1.6250303983688354, "learning_rate": 3.2083115580955204e-06, "loss": 0.7754, "step": 17784 }, { "epoch": 0.6281982789451144, "grad_norm": 1.6545441150665283, "learning_rate": 3.207777545950941e-06, "loss": 0.7614, "step": 17785 }, { "epoch": 0.6282336007488223, "grad_norm": 2.6690380573272705, "learning_rate": 3.207243557262631e-06, "loss": 0.8097, "step": 17786 }, { "epoch": 0.6282689225525302, "grad_norm": 1.6091887950897217, "learning_rate": 3.20670959203758e-06, "loss": 0.7654, "step": 17787 }, { "epoch": 0.6283042443562381, "grad_norm": 1.6015270948410034, "learning_rate": 3.206175650282777e-06, "loss": 0.7789, "step": 17788 }, { "epoch": 0.6283395661599459, "grad_norm": 3.0300042629241943, "learning_rate": 3.20564173200521e-06, "loss": 0.7709, "step": 17789 }, { "epoch": 0.6283748879636538, "grad_norm": 1.6845415830612183, "learning_rate": 3.205107837211867e-06, "loss": 0.7576, "step": 17790 }, { "epoch": 0.6284102097673617, "grad_norm": 1.6980637311935425, "learning_rate": 3.204573965909735e-06, "loss": 0.7885, "step": 17791 }, { "epoch": 0.6284455315710696, "grad_norm": 1.723096489906311, "learning_rate": 3.204040118105801e-06, "loss": 0.7865, "step": 17792 }, { "epoch": 0.6284808533747775, "grad_norm": 1.7945581674575806, "learning_rate": 3.2035062938070536e-06, "loss": 0.7497, "step": 17793 }, { "epoch": 0.6285161751784855, "grad_norm": 1.6196140050888062, "learning_rate": 3.2029724930204784e-06, "loss": 0.7769, "step": 17794 }, { "epoch": 0.6285514969821934, "grad_norm": 1.7389888763427734, "learning_rate": 3.2024387157530614e-06, "loss": 0.7943, "step": 17795 }, { "epoch": 0.6285868187859013, "grad_norm": 1.6442408561706543, "learning_rate": 3.2019049620117875e-06, "loss": 0.7814, "step": 17796 }, { "epoch": 0.6286221405896092, "grad_norm": 1.6270983219146729, "learning_rate": 3.2013712318036427e-06, "loss": 0.785, "step": 17797 }, { "epoch": 0.6286574623933171, "grad_norm": 1.667249321937561, "learning_rate": 3.2008375251356146e-06, "loss": 0.7496, "step": 17798 }, { "epoch": 0.628692784197025, "grad_norm": 1.5696136951446533, "learning_rate": 3.200303842014685e-06, "loss": 0.7798, "step": 17799 }, { "epoch": 0.6287281060007329, "grad_norm": 1.492674469947815, "learning_rate": 3.1997701824478423e-06, "loss": 0.7259, "step": 17800 }, { "epoch": 0.6287634278044408, "grad_norm": 1.7341110706329346, "learning_rate": 3.199236546442068e-06, "loss": 0.7891, "step": 17801 }, { "epoch": 0.6287987496081487, "grad_norm": 1.5983995199203491, "learning_rate": 3.198702934004347e-06, "loss": 0.7739, "step": 17802 }, { "epoch": 0.6288340714118567, "grad_norm": 1.7161871194839478, "learning_rate": 3.198169345141664e-06, "loss": 0.8287, "step": 17803 }, { "epoch": 0.6288693932155646, "grad_norm": 1.5860964059829712, "learning_rate": 3.1976357798610026e-06, "loss": 0.7918, "step": 17804 }, { "epoch": 0.6289047150192725, "grad_norm": 1.6672053337097168, "learning_rate": 3.197102238169344e-06, "loss": 0.737, "step": 17805 }, { "epoch": 0.6289400368229804, "grad_norm": 1.6565319299697876, "learning_rate": 3.196568720073673e-06, "loss": 0.8026, "step": 17806 }, { "epoch": 0.6289753586266883, "grad_norm": 1.844133973121643, "learning_rate": 3.1960352255809716e-06, "loss": 0.8123, "step": 17807 }, { "epoch": 0.6290106804303962, "grad_norm": 1.686265230178833, "learning_rate": 3.1955017546982215e-06, "loss": 0.7961, "step": 17808 }, { "epoch": 0.6290460022341041, "grad_norm": 1.9461355209350586, "learning_rate": 3.1949683074324063e-06, "loss": 0.7932, "step": 17809 }, { "epoch": 0.629081324037812, "grad_norm": 2.6024057865142822, "learning_rate": 3.194434883790506e-06, "loss": 0.773, "step": 17810 }, { "epoch": 0.6291166458415199, "grad_norm": 1.5585368871688843, "learning_rate": 3.193901483779502e-06, "loss": 0.7547, "step": 17811 }, { "epoch": 0.6291519676452278, "grad_norm": 1.6318671703338623, "learning_rate": 3.1933681074063766e-06, "loss": 0.7622, "step": 17812 }, { "epoch": 0.6291872894489358, "grad_norm": 1.6177451610565186, "learning_rate": 3.1928347546781115e-06, "loss": 0.8317, "step": 17813 }, { "epoch": 0.6292226112526437, "grad_norm": 1.6960490942001343, "learning_rate": 3.1923014256016834e-06, "loss": 0.7965, "step": 17814 }, { "epoch": 0.6292579330563515, "grad_norm": 1.6693501472473145, "learning_rate": 3.1917681201840735e-06, "loss": 0.766, "step": 17815 }, { "epoch": 0.6292932548600594, "grad_norm": 1.569926142692566, "learning_rate": 3.1912348384322646e-06, "loss": 0.7933, "step": 17816 }, { "epoch": 0.6293285766637673, "grad_norm": 1.6605030298233032, "learning_rate": 3.1907015803532327e-06, "loss": 0.7824, "step": 17817 }, { "epoch": 0.6293638984674752, "grad_norm": 1.6011193990707397, "learning_rate": 3.1901683459539584e-06, "loss": 0.7647, "step": 17818 }, { "epoch": 0.6293992202711831, "grad_norm": 1.659784197807312, "learning_rate": 3.1896351352414213e-06, "loss": 0.7829, "step": 17819 }, { "epoch": 0.629434542074891, "grad_norm": 1.6753365993499756, "learning_rate": 3.1891019482225994e-06, "loss": 0.7735, "step": 17820 }, { "epoch": 0.6294698638785989, "grad_norm": 1.7687852382659912, "learning_rate": 3.1885687849044695e-06, "loss": 0.7943, "step": 17821 }, { "epoch": 0.6295051856823068, "grad_norm": 1.5257385969161987, "learning_rate": 3.188035645294012e-06, "loss": 0.7658, "step": 17822 }, { "epoch": 0.6295405074860148, "grad_norm": 1.7511134147644043, "learning_rate": 3.187502529398202e-06, "loss": 0.7985, "step": 17823 }, { "epoch": 0.6295758292897227, "grad_norm": 1.9723728895187378, "learning_rate": 3.18696943722402e-06, "loss": 0.7648, "step": 17824 }, { "epoch": 0.6296111510934306, "grad_norm": 1.7426849603652954, "learning_rate": 3.1864363687784405e-06, "loss": 0.801, "step": 17825 }, { "epoch": 0.6296464728971385, "grad_norm": 1.975044846534729, "learning_rate": 3.18590332406844e-06, "loss": 0.7729, "step": 17826 }, { "epoch": 0.6296817947008464, "grad_norm": 1.7532416582107544, "learning_rate": 3.185370303100997e-06, "loss": 0.7798, "step": 17827 }, { "epoch": 0.6297171165045543, "grad_norm": 1.947163701057434, "learning_rate": 3.184837305883086e-06, "loss": 0.7666, "step": 17828 }, { "epoch": 0.6297524383082622, "grad_norm": 1.654720425605774, "learning_rate": 3.184304332421683e-06, "loss": 0.8123, "step": 17829 }, { "epoch": 0.6297877601119701, "grad_norm": 1.8364548683166504, "learning_rate": 3.183771382723764e-06, "loss": 0.7739, "step": 17830 }, { "epoch": 0.629823081915678, "grad_norm": 1.5597037076950073, "learning_rate": 3.1832384567963037e-06, "loss": 0.7439, "step": 17831 }, { "epoch": 0.629858403719386, "grad_norm": 1.6622354984283447, "learning_rate": 3.182705554646277e-06, "loss": 0.7602, "step": 17832 }, { "epoch": 0.6298937255230939, "grad_norm": 1.6079579591751099, "learning_rate": 3.1821726762806578e-06, "loss": 0.761, "step": 17833 }, { "epoch": 0.6299290473268018, "grad_norm": 1.9089806079864502, "learning_rate": 3.1816398217064213e-06, "loss": 0.7851, "step": 17834 }, { "epoch": 0.6299643691305097, "grad_norm": 1.6911991834640503, "learning_rate": 3.1811069909305416e-06, "loss": 0.7776, "step": 17835 }, { "epoch": 0.6299996909342176, "grad_norm": 1.7897700071334839, "learning_rate": 3.1805741839599896e-06, "loss": 0.7755, "step": 17836 }, { "epoch": 0.6300350127379255, "grad_norm": 1.6258913278579712, "learning_rate": 3.180041400801742e-06, "loss": 0.7705, "step": 17837 }, { "epoch": 0.6300703345416334, "grad_norm": 1.6955885887145996, "learning_rate": 3.1795086414627706e-06, "loss": 0.8372, "step": 17838 }, { "epoch": 0.6301056563453413, "grad_norm": 1.5289511680603027, "learning_rate": 3.1789759059500475e-06, "loss": 0.7362, "step": 17839 }, { "epoch": 0.6301409781490492, "grad_norm": 1.5728272199630737, "learning_rate": 3.1784431942705463e-06, "loss": 0.7737, "step": 17840 }, { "epoch": 0.630176299952757, "grad_norm": 1.626989483833313, "learning_rate": 3.1779105064312375e-06, "loss": 0.7548, "step": 17841 }, { "epoch": 0.630211621756465, "grad_norm": 1.6751493215560913, "learning_rate": 3.1773778424390927e-06, "loss": 0.7436, "step": 17842 }, { "epoch": 0.6302469435601729, "grad_norm": 1.6698365211486816, "learning_rate": 3.1768452023010856e-06, "loss": 0.7597, "step": 17843 }, { "epoch": 0.6302822653638808, "grad_norm": 1.662661075592041, "learning_rate": 3.176312586024185e-06, "loss": 0.7667, "step": 17844 }, { "epoch": 0.6303175871675887, "grad_norm": 1.5951744318008423, "learning_rate": 3.1757799936153617e-06, "loss": 0.7861, "step": 17845 }, { "epoch": 0.6303529089712966, "grad_norm": 1.654016137123108, "learning_rate": 3.175247425081588e-06, "loss": 0.7897, "step": 17846 }, { "epoch": 0.6303882307750045, "grad_norm": 1.6815848350524902, "learning_rate": 3.1747148804298324e-06, "loss": 0.7734, "step": 17847 }, { "epoch": 0.6304235525787124, "grad_norm": 1.6695929765701294, "learning_rate": 3.1741823596670652e-06, "loss": 0.8104, "step": 17848 }, { "epoch": 0.6304588743824203, "grad_norm": 1.6552294492721558, "learning_rate": 3.1736498628002566e-06, "loss": 0.7969, "step": 17849 }, { "epoch": 0.6304941961861282, "grad_norm": 1.642236351966858, "learning_rate": 3.1731173898363765e-06, "loss": 0.7964, "step": 17850 }, { "epoch": 0.6305295179898361, "grad_norm": 1.6746056079864502, "learning_rate": 3.1725849407823915e-06, "loss": 0.7785, "step": 17851 }, { "epoch": 0.630564839793544, "grad_norm": 1.7069286108016968, "learning_rate": 3.172052515645271e-06, "loss": 0.7983, "step": 17852 }, { "epoch": 0.630600161597252, "grad_norm": 1.7347590923309326, "learning_rate": 3.1715201144319834e-06, "loss": 0.7998, "step": 17853 }, { "epoch": 0.6306354834009599, "grad_norm": 1.69057297706604, "learning_rate": 3.170987737149497e-06, "loss": 0.7594, "step": 17854 }, { "epoch": 0.6306708052046678, "grad_norm": 2.2895634174346924, "learning_rate": 3.170455383804779e-06, "loss": 0.7901, "step": 17855 }, { "epoch": 0.6307061270083757, "grad_norm": 1.6975772380828857, "learning_rate": 3.169923054404798e-06, "loss": 0.8104, "step": 17856 }, { "epoch": 0.6307414488120836, "grad_norm": 1.6614909172058105, "learning_rate": 3.169390748956518e-06, "loss": 0.7753, "step": 17857 }, { "epoch": 0.6307767706157915, "grad_norm": 1.6867985725402832, "learning_rate": 3.16885846746691e-06, "loss": 0.7791, "step": 17858 }, { "epoch": 0.6308120924194994, "grad_norm": 1.7488646507263184, "learning_rate": 3.1683262099429373e-06, "loss": 0.797, "step": 17859 }, { "epoch": 0.6308474142232073, "grad_norm": 1.6962155103683472, "learning_rate": 3.167793976391566e-06, "loss": 0.7978, "step": 17860 }, { "epoch": 0.6308827360269152, "grad_norm": 1.8552385568618774, "learning_rate": 3.1672617668197637e-06, "loss": 0.7693, "step": 17861 }, { "epoch": 0.6309180578306232, "grad_norm": 1.5910823345184326, "learning_rate": 3.1667295812344957e-06, "loss": 0.7879, "step": 17862 }, { "epoch": 0.6309533796343311, "grad_norm": 1.5060020685195923, "learning_rate": 3.1661974196427243e-06, "loss": 0.7685, "step": 17863 }, { "epoch": 0.630988701438039, "grad_norm": 1.7944365739822388, "learning_rate": 3.165665282051418e-06, "loss": 0.8366, "step": 17864 }, { "epoch": 0.6310240232417469, "grad_norm": 1.643290400505066, "learning_rate": 3.165133168467539e-06, "loss": 0.7673, "step": 17865 }, { "epoch": 0.6310593450454548, "grad_norm": 1.7761765718460083, "learning_rate": 3.1646010788980515e-06, "loss": 0.8414, "step": 17866 }, { "epoch": 0.6310946668491626, "grad_norm": 1.664861798286438, "learning_rate": 3.164069013349921e-06, "loss": 0.7922, "step": 17867 }, { "epoch": 0.6311299886528705, "grad_norm": 1.5369585752487183, "learning_rate": 3.163536971830111e-06, "loss": 0.8003, "step": 17868 }, { "epoch": 0.6311653104565784, "grad_norm": 1.7229691743850708, "learning_rate": 3.163004954345583e-06, "loss": 0.7352, "step": 17869 }, { "epoch": 0.6312006322602863, "grad_norm": 1.7034863233566284, "learning_rate": 3.1624729609033005e-06, "loss": 0.7851, "step": 17870 }, { "epoch": 0.6312359540639942, "grad_norm": 1.7071139812469482, "learning_rate": 3.161940991510227e-06, "loss": 0.7614, "step": 17871 }, { "epoch": 0.6312712758677022, "grad_norm": 1.752022624015808, "learning_rate": 3.1614090461733236e-06, "loss": 0.7805, "step": 17872 }, { "epoch": 0.6313065976714101, "grad_norm": 1.6533541679382324, "learning_rate": 3.160877124899553e-06, "loss": 0.7809, "step": 17873 }, { "epoch": 0.631341919475118, "grad_norm": 1.5778700113296509, "learning_rate": 3.160345227695878e-06, "loss": 0.7751, "step": 17874 }, { "epoch": 0.6313772412788259, "grad_norm": 1.702623724937439, "learning_rate": 3.1598133545692576e-06, "loss": 0.7727, "step": 17875 }, { "epoch": 0.6314125630825338, "grad_norm": 1.8126264810562134, "learning_rate": 3.1592815055266547e-06, "loss": 0.8049, "step": 17876 }, { "epoch": 0.6314478848862417, "grad_norm": 1.8055346012115479, "learning_rate": 3.158749680575029e-06, "loss": 0.7797, "step": 17877 }, { "epoch": 0.6314832066899496, "grad_norm": 1.7526016235351562, "learning_rate": 3.158217879721342e-06, "loss": 0.7513, "step": 17878 }, { "epoch": 0.6315185284936575, "grad_norm": 1.6642146110534668, "learning_rate": 3.157686102972552e-06, "loss": 0.7483, "step": 17879 }, { "epoch": 0.6315538502973654, "grad_norm": 1.5198235511779785, "learning_rate": 3.157154350335621e-06, "loss": 0.7546, "step": 17880 }, { "epoch": 0.6315891721010733, "grad_norm": 1.6142398118972778, "learning_rate": 3.156622621817507e-06, "loss": 0.7646, "step": 17881 }, { "epoch": 0.6316244939047813, "grad_norm": 2.527559280395508, "learning_rate": 3.1560909174251696e-06, "loss": 0.7664, "step": 17882 }, { "epoch": 0.6316598157084892, "grad_norm": 1.5631628036499023, "learning_rate": 3.1555592371655684e-06, "loss": 0.8026, "step": 17883 }, { "epoch": 0.6316951375121971, "grad_norm": 1.586092233657837, "learning_rate": 3.15502758104566e-06, "loss": 0.7871, "step": 17884 }, { "epoch": 0.631730459315905, "grad_norm": 1.7802294492721558, "learning_rate": 3.154495949072405e-06, "loss": 0.8136, "step": 17885 }, { "epoch": 0.6317657811196129, "grad_norm": 1.7068134546279907, "learning_rate": 3.15396434125276e-06, "loss": 0.8277, "step": 17886 }, { "epoch": 0.6318011029233208, "grad_norm": 1.5305736064910889, "learning_rate": 3.153432757593683e-06, "loss": 0.7929, "step": 17887 }, { "epoch": 0.6318364247270287, "grad_norm": 1.6001180410385132, "learning_rate": 3.152901198102131e-06, "loss": 0.7934, "step": 17888 }, { "epoch": 0.6318717465307366, "grad_norm": 1.672911524772644, "learning_rate": 3.1523696627850593e-06, "loss": 0.7718, "step": 17889 }, { "epoch": 0.6319070683344445, "grad_norm": 1.7573391199111938, "learning_rate": 3.1518381516494266e-06, "loss": 0.7994, "step": 17890 }, { "epoch": 0.6319423901381525, "grad_norm": 1.6603833436965942, "learning_rate": 3.151306664702189e-06, "loss": 0.7756, "step": 17891 }, { "epoch": 0.6319777119418604, "grad_norm": 1.8428571224212646, "learning_rate": 3.1507752019503025e-06, "loss": 0.77, "step": 17892 }, { "epoch": 0.6320130337455682, "grad_norm": 1.8365815877914429, "learning_rate": 3.1502437634007228e-06, "loss": 0.7905, "step": 17893 }, { "epoch": 0.6320483555492761, "grad_norm": 1.728641152381897, "learning_rate": 3.1497123490604035e-06, "loss": 0.7694, "step": 17894 }, { "epoch": 0.632083677352984, "grad_norm": 1.7666913270950317, "learning_rate": 3.149180958936303e-06, "loss": 0.7767, "step": 17895 }, { "epoch": 0.6321189991566919, "grad_norm": 1.667067050933838, "learning_rate": 3.1486495930353734e-06, "loss": 0.8008, "step": 17896 }, { "epoch": 0.6321543209603998, "grad_norm": 1.6166819334030151, "learning_rate": 3.148118251364569e-06, "loss": 0.8005, "step": 17897 }, { "epoch": 0.6321896427641077, "grad_norm": 1.92074716091156, "learning_rate": 3.147586933930846e-06, "loss": 0.7716, "step": 17898 }, { "epoch": 0.6322249645678156, "grad_norm": 2.259758949279785, "learning_rate": 3.147055640741158e-06, "loss": 0.7936, "step": 17899 }, { "epoch": 0.6322602863715235, "grad_norm": 1.803976058959961, "learning_rate": 3.146524371802455e-06, "loss": 0.7616, "step": 17900 }, { "epoch": 0.6322956081752314, "grad_norm": 1.7107044458389282, "learning_rate": 3.1459931271216943e-06, "loss": 0.7616, "step": 17901 }, { "epoch": 0.6323309299789394, "grad_norm": 1.7099366188049316, "learning_rate": 3.145461906705827e-06, "loss": 0.7344, "step": 17902 }, { "epoch": 0.6323662517826473, "grad_norm": 1.7162152528762817, "learning_rate": 3.1449307105618043e-06, "loss": 0.7981, "step": 17903 }, { "epoch": 0.6324015735863552, "grad_norm": 1.858109712600708, "learning_rate": 3.144399538696581e-06, "loss": 0.7393, "step": 17904 }, { "epoch": 0.6324368953900631, "grad_norm": 1.6966675519943237, "learning_rate": 3.1438683911171085e-06, "loss": 0.8035, "step": 17905 }, { "epoch": 0.632472217193771, "grad_norm": 1.9655048847198486, "learning_rate": 3.1433372678303377e-06, "loss": 0.7808, "step": 17906 }, { "epoch": 0.6325075389974789, "grad_norm": 1.7339273691177368, "learning_rate": 3.142806168843217e-06, "loss": 0.7789, "step": 17907 }, { "epoch": 0.6325428608011868, "grad_norm": 1.502143383026123, "learning_rate": 3.1422750941627024e-06, "loss": 0.7459, "step": 17908 }, { "epoch": 0.6325781826048947, "grad_norm": 1.5774016380310059, "learning_rate": 3.1417440437957414e-06, "loss": 0.7965, "step": 17909 }, { "epoch": 0.6326135044086026, "grad_norm": 1.59329092502594, "learning_rate": 3.141213017749284e-06, "loss": 0.7641, "step": 17910 }, { "epoch": 0.6326488262123106, "grad_norm": 1.4783885478973389, "learning_rate": 3.1406820160302825e-06, "loss": 0.7616, "step": 17911 }, { "epoch": 0.6326841480160185, "grad_norm": 1.7264171838760376, "learning_rate": 3.1401510386456857e-06, "loss": 0.787, "step": 17912 }, { "epoch": 0.6327194698197264, "grad_norm": 1.5880866050720215, "learning_rate": 3.1396200856024406e-06, "loss": 0.7713, "step": 17913 }, { "epoch": 0.6327547916234343, "grad_norm": 1.6589471101760864, "learning_rate": 3.1390891569075e-06, "loss": 0.7715, "step": 17914 }, { "epoch": 0.6327901134271422, "grad_norm": 1.701959490776062, "learning_rate": 3.1385582525678086e-06, "loss": 0.7869, "step": 17915 }, { "epoch": 0.6328254352308501, "grad_norm": 2.144498109817505, "learning_rate": 3.138027372590319e-06, "loss": 0.7879, "step": 17916 }, { "epoch": 0.632860757034558, "grad_norm": 0.9587790369987488, "learning_rate": 3.137496516981976e-06, "loss": 0.5793, "step": 17917 }, { "epoch": 0.6328960788382659, "grad_norm": 1.7219845056533813, "learning_rate": 3.136965685749728e-06, "loss": 0.8009, "step": 17918 }, { "epoch": 0.6329314006419737, "grad_norm": 1.7907567024230957, "learning_rate": 3.1364348789005246e-06, "loss": 0.7762, "step": 17919 }, { "epoch": 0.6329667224456816, "grad_norm": 2.9795854091644287, "learning_rate": 3.13590409644131e-06, "loss": 0.7322, "step": 17920 }, { "epoch": 0.6330020442493896, "grad_norm": 1.971013069152832, "learning_rate": 3.1353733383790317e-06, "loss": 0.7626, "step": 17921 }, { "epoch": 0.6330373660530975, "grad_norm": 1.6147280931472778, "learning_rate": 3.1348426047206383e-06, "loss": 0.7452, "step": 17922 }, { "epoch": 0.6330726878568054, "grad_norm": 1.5001388788223267, "learning_rate": 3.1343118954730733e-06, "loss": 0.7383, "step": 17923 }, { "epoch": 0.6331080096605133, "grad_norm": 1.611789345741272, "learning_rate": 3.1337812106432853e-06, "loss": 0.7655, "step": 17924 }, { "epoch": 0.6331433314642212, "grad_norm": 1.6587893962860107, "learning_rate": 3.1332505502382154e-06, "loss": 0.7487, "step": 17925 }, { "epoch": 0.6331786532679291, "grad_norm": 1.648770809173584, "learning_rate": 3.1327199142648123e-06, "loss": 0.7978, "step": 17926 }, { "epoch": 0.633213975071637, "grad_norm": 1.6801010370254517, "learning_rate": 3.1321893027300202e-06, "loss": 0.7452, "step": 17927 }, { "epoch": 0.6332492968753449, "grad_norm": 1.738621711730957, "learning_rate": 3.131658715640783e-06, "loss": 0.7709, "step": 17928 }, { "epoch": 0.6332846186790528, "grad_norm": 1.7349375486373901, "learning_rate": 3.1311281530040454e-06, "loss": 0.768, "step": 17929 }, { "epoch": 0.6333199404827607, "grad_norm": 1.839092493057251, "learning_rate": 3.1305976148267513e-06, "loss": 0.7892, "step": 17930 }, { "epoch": 0.6333552622864687, "grad_norm": 1.9345190525054932, "learning_rate": 3.1300671011158435e-06, "loss": 0.7993, "step": 17931 }, { "epoch": 0.6333905840901766, "grad_norm": 1.6665388345718384, "learning_rate": 3.129536611878267e-06, "loss": 0.792, "step": 17932 }, { "epoch": 0.6334259058938845, "grad_norm": 1.7071962356567383, "learning_rate": 3.129006147120964e-06, "loss": 0.7804, "step": 17933 }, { "epoch": 0.6334612276975924, "grad_norm": 1.6930478811264038, "learning_rate": 3.1284757068508755e-06, "loss": 0.7703, "step": 17934 }, { "epoch": 0.6334965495013003, "grad_norm": 1.682611107826233, "learning_rate": 3.1279452910749457e-06, "loss": 0.7769, "step": 17935 }, { "epoch": 0.6335318713050082, "grad_norm": 2.1250548362731934, "learning_rate": 3.1274148998001165e-06, "loss": 0.7902, "step": 17936 }, { "epoch": 0.6335671931087161, "grad_norm": 2.10931134223938, "learning_rate": 3.126884533033329e-06, "loss": 0.7731, "step": 17937 }, { "epoch": 0.633602514912424, "grad_norm": 1.5773290395736694, "learning_rate": 3.126354190781524e-06, "loss": 0.7843, "step": 17938 }, { "epoch": 0.6336378367161319, "grad_norm": 1.7440516948699951, "learning_rate": 3.1258238730516426e-06, "loss": 0.7682, "step": 17939 }, { "epoch": 0.6336731585198399, "grad_norm": 1.6318590641021729, "learning_rate": 3.1252935798506274e-06, "loss": 0.7504, "step": 17940 }, { "epoch": 0.6337084803235478, "grad_norm": 1.8619849681854248, "learning_rate": 3.124763311185417e-06, "loss": 0.8097, "step": 17941 }, { "epoch": 0.6337438021272557, "grad_norm": 3.1753957271575928, "learning_rate": 3.1242330670629535e-06, "loss": 0.7467, "step": 17942 }, { "epoch": 0.6337791239309636, "grad_norm": 1.6385396718978882, "learning_rate": 3.123702847490174e-06, "loss": 0.7745, "step": 17943 }, { "epoch": 0.6338144457346715, "grad_norm": 1.7245100736618042, "learning_rate": 3.123172652474017e-06, "loss": 0.7456, "step": 17944 }, { "epoch": 0.6338497675383793, "grad_norm": 1.709263801574707, "learning_rate": 3.122642482021425e-06, "loss": 0.7837, "step": 17945 }, { "epoch": 0.6338850893420872, "grad_norm": 1.461063027381897, "learning_rate": 3.1221123361393356e-06, "loss": 0.7104, "step": 17946 }, { "epoch": 0.6339204111457951, "grad_norm": 1.7374504804611206, "learning_rate": 3.1215822148346857e-06, "loss": 0.8276, "step": 17947 }, { "epoch": 0.633955732949503, "grad_norm": 1.4257584810256958, "learning_rate": 3.121052118114416e-06, "loss": 0.7782, "step": 17948 }, { "epoch": 0.6339910547532109, "grad_norm": 1.5994844436645508, "learning_rate": 3.1205220459854613e-06, "loss": 0.7787, "step": 17949 }, { "epoch": 0.6340263765569188, "grad_norm": 1.8990455865859985, "learning_rate": 3.119991998454762e-06, "loss": 0.786, "step": 17950 }, { "epoch": 0.6340616983606268, "grad_norm": 1.6588325500488281, "learning_rate": 3.1194619755292543e-06, "loss": 0.7595, "step": 17951 }, { "epoch": 0.6340970201643347, "grad_norm": 1.5797700881958008, "learning_rate": 3.1189319772158737e-06, "loss": 0.7542, "step": 17952 }, { "epoch": 0.6341323419680426, "grad_norm": 1.8838627338409424, "learning_rate": 3.118402003521559e-06, "loss": 0.7834, "step": 17953 }, { "epoch": 0.6341676637717505, "grad_norm": 1.773620367050171, "learning_rate": 3.1178720544532447e-06, "loss": 0.7966, "step": 17954 }, { "epoch": 0.6342029855754584, "grad_norm": 1.6478039026260376, "learning_rate": 3.1173421300178664e-06, "loss": 0.779, "step": 17955 }, { "epoch": 0.6342383073791663, "grad_norm": 1.716981291770935, "learning_rate": 3.1168122302223615e-06, "loss": 0.7789, "step": 17956 }, { "epoch": 0.6342736291828742, "grad_norm": 1.6674857139587402, "learning_rate": 3.1162823550736637e-06, "loss": 0.7825, "step": 17957 }, { "epoch": 0.6343089509865821, "grad_norm": 1.769454002380371, "learning_rate": 3.1157525045787073e-06, "loss": 0.7867, "step": 17958 }, { "epoch": 0.63434427279029, "grad_norm": 1.8558201789855957, "learning_rate": 3.1152226787444295e-06, "loss": 0.7811, "step": 17959 }, { "epoch": 0.634379594593998, "grad_norm": 1.8378227949142456, "learning_rate": 3.1146928775777628e-06, "loss": 0.7718, "step": 17960 }, { "epoch": 0.6344149163977059, "grad_norm": 2.0402276515960693, "learning_rate": 3.1141631010856423e-06, "loss": 0.7689, "step": 17961 }, { "epoch": 0.6344502382014138, "grad_norm": 1.7545969486236572, "learning_rate": 3.113633349274998e-06, "loss": 0.8106, "step": 17962 }, { "epoch": 0.6344855600051217, "grad_norm": 1.5801066160202026, "learning_rate": 3.1131036221527676e-06, "loss": 0.7792, "step": 17963 }, { "epoch": 0.6345208818088296, "grad_norm": 1.6441456079483032, "learning_rate": 3.112573919725882e-06, "loss": 0.782, "step": 17964 }, { "epoch": 0.6345562036125375, "grad_norm": 1.6293127536773682, "learning_rate": 3.1120442420012737e-06, "loss": 0.7627, "step": 17965 }, { "epoch": 0.6345915254162454, "grad_norm": 1.6828351020812988, "learning_rate": 3.1115145889858754e-06, "loss": 0.8137, "step": 17966 }, { "epoch": 0.6346268472199533, "grad_norm": 1.6843281984329224, "learning_rate": 3.1109849606866206e-06, "loss": 0.7685, "step": 17967 }, { "epoch": 0.6346621690236612, "grad_norm": 1.7292546033859253, "learning_rate": 3.1104553571104373e-06, "loss": 0.777, "step": 17968 }, { "epoch": 0.6346974908273691, "grad_norm": 1.9062678813934326, "learning_rate": 3.1099257782642605e-06, "loss": 0.7685, "step": 17969 }, { "epoch": 0.6347328126310771, "grad_norm": 1.6503955125808716, "learning_rate": 3.10939622415502e-06, "loss": 0.7661, "step": 17970 }, { "epoch": 0.6347681344347849, "grad_norm": 1.6646242141723633, "learning_rate": 3.1088666947896453e-06, "loss": 0.8199, "step": 17971 }, { "epoch": 0.6348034562384928, "grad_norm": 0.9133583307266235, "learning_rate": 3.108337190175068e-06, "loss": 0.5784, "step": 17972 }, { "epoch": 0.6348387780422007, "grad_norm": 1.7707173824310303, "learning_rate": 3.107807710318218e-06, "loss": 0.8102, "step": 17973 }, { "epoch": 0.6348740998459086, "grad_norm": 1.8642864227294922, "learning_rate": 3.107278255226025e-06, "loss": 0.8045, "step": 17974 }, { "epoch": 0.6349094216496165, "grad_norm": 1.6817240715026855, "learning_rate": 3.106748824905419e-06, "loss": 0.7869, "step": 17975 }, { "epoch": 0.6349447434533244, "grad_norm": 1.5990653038024902, "learning_rate": 3.106219419363328e-06, "loss": 0.7755, "step": 17976 }, { "epoch": 0.6349800652570323, "grad_norm": 1.639070749282837, "learning_rate": 3.1056900386066813e-06, "loss": 0.7494, "step": 17977 }, { "epoch": 0.6350153870607402, "grad_norm": 1.8903002738952637, "learning_rate": 3.105160682642408e-06, "loss": 0.782, "step": 17978 }, { "epoch": 0.6350507088644481, "grad_norm": 1.5484788417816162, "learning_rate": 3.1046313514774363e-06, "loss": 0.7408, "step": 17979 }, { "epoch": 0.635086030668156, "grad_norm": 1.884169578552246, "learning_rate": 3.104102045118692e-06, "loss": 0.7965, "step": 17980 }, { "epoch": 0.635121352471864, "grad_norm": 1.5747500658035278, "learning_rate": 3.1035727635731027e-06, "loss": 0.7663, "step": 17981 }, { "epoch": 0.6351566742755719, "grad_norm": 1.6491894721984863, "learning_rate": 3.1030435068475977e-06, "loss": 0.7889, "step": 17982 }, { "epoch": 0.6351919960792798, "grad_norm": 1.6869646310806274, "learning_rate": 3.1025142749491015e-06, "loss": 0.7599, "step": 17983 }, { "epoch": 0.6352273178829877, "grad_norm": 1.615326166152954, "learning_rate": 3.1019850678845432e-06, "loss": 0.754, "step": 17984 }, { "epoch": 0.6352626396866956, "grad_norm": 0.9659327268600464, "learning_rate": 3.1014558856608473e-06, "loss": 0.5694, "step": 17985 }, { "epoch": 0.6352979614904035, "grad_norm": 1.6719919443130493, "learning_rate": 3.1009267282849387e-06, "loss": 0.7673, "step": 17986 }, { "epoch": 0.6353332832941114, "grad_norm": 1.6171619892120361, "learning_rate": 3.1003975957637457e-06, "loss": 0.7837, "step": 17987 }, { "epoch": 0.6353686050978193, "grad_norm": 1.7667756080627441, "learning_rate": 3.099868488104191e-06, "loss": 0.7759, "step": 17988 }, { "epoch": 0.6354039269015272, "grad_norm": 1.6409821510314941, "learning_rate": 3.0993394053131996e-06, "loss": 0.7818, "step": 17989 }, { "epoch": 0.6354392487052352, "grad_norm": 1.4920830726623535, "learning_rate": 3.0988103473976983e-06, "loss": 0.7254, "step": 17990 }, { "epoch": 0.6354745705089431, "grad_norm": 1.6810139417648315, "learning_rate": 3.0982813143646094e-06, "loss": 0.7741, "step": 17991 }, { "epoch": 0.635509892312651, "grad_norm": 2.2820839881896973, "learning_rate": 3.0977523062208564e-06, "loss": 0.7694, "step": 17992 }, { "epoch": 0.6355452141163589, "grad_norm": 1.8081587553024292, "learning_rate": 3.0972233229733644e-06, "loss": 0.7615, "step": 17993 }, { "epoch": 0.6355805359200668, "grad_norm": 2.533582925796509, "learning_rate": 3.096694364629056e-06, "loss": 0.7661, "step": 17994 }, { "epoch": 0.6356158577237747, "grad_norm": 1.6177129745483398, "learning_rate": 3.096165431194853e-06, "loss": 0.7951, "step": 17995 }, { "epoch": 0.6356511795274826, "grad_norm": 1.634513258934021, "learning_rate": 3.0956365226776795e-06, "loss": 0.7298, "step": 17996 }, { "epoch": 0.6356865013311904, "grad_norm": 2.6551570892333984, "learning_rate": 3.095107639084459e-06, "loss": 0.7645, "step": 17997 }, { "epoch": 0.6357218231348983, "grad_norm": 1.7951743602752686, "learning_rate": 3.0945787804221105e-06, "loss": 0.794, "step": 17998 }, { "epoch": 0.6357571449386062, "grad_norm": 1.9408996105194092, "learning_rate": 3.094049946697555e-06, "loss": 0.7837, "step": 17999 }, { "epoch": 0.6357924667423142, "grad_norm": 1.6412758827209473, "learning_rate": 3.093521137917717e-06, "loss": 0.7866, "step": 18000 }, { "epoch": 0.6358277885460221, "grad_norm": 1.760110855102539, "learning_rate": 3.0929923540895164e-06, "loss": 0.7716, "step": 18001 }, { "epoch": 0.63586311034973, "grad_norm": 1.6962817907333374, "learning_rate": 3.0924635952198718e-06, "loss": 0.7866, "step": 18002 }, { "epoch": 0.6358984321534379, "grad_norm": 1.5840256214141846, "learning_rate": 3.0919348613157063e-06, "loss": 0.773, "step": 18003 }, { "epoch": 0.6359337539571458, "grad_norm": 1.5429588556289673, "learning_rate": 3.0914061523839385e-06, "loss": 0.7512, "step": 18004 }, { "epoch": 0.6359690757608537, "grad_norm": 1.82823646068573, "learning_rate": 3.0908774684314874e-06, "loss": 0.7624, "step": 18005 }, { "epoch": 0.6360043975645616, "grad_norm": 1.9204543828964233, "learning_rate": 3.090348809465274e-06, "loss": 0.8307, "step": 18006 }, { "epoch": 0.6360397193682695, "grad_norm": 1.72218918800354, "learning_rate": 3.089820175492215e-06, "loss": 0.7727, "step": 18007 }, { "epoch": 0.6360750411719774, "grad_norm": 1.6750410795211792, "learning_rate": 3.0892915665192324e-06, "loss": 0.7846, "step": 18008 }, { "epoch": 0.6361103629756854, "grad_norm": 1.749792456626892, "learning_rate": 3.088762982553242e-06, "loss": 0.753, "step": 18009 }, { "epoch": 0.6361456847793933, "grad_norm": 1.8706637620925903, "learning_rate": 3.0882344236011613e-06, "loss": 0.7968, "step": 18010 }, { "epoch": 0.6361810065831012, "grad_norm": 1.6780675649642944, "learning_rate": 3.08770588966991e-06, "loss": 0.7631, "step": 18011 }, { "epoch": 0.6362163283868091, "grad_norm": 2.1500771045684814, "learning_rate": 3.087177380766405e-06, "loss": 0.7976, "step": 18012 }, { "epoch": 0.636251650190517, "grad_norm": 1.6450159549713135, "learning_rate": 3.0866488968975616e-06, "loss": 0.7911, "step": 18013 }, { "epoch": 0.6362869719942249, "grad_norm": 2.0911309719085693, "learning_rate": 3.0861204380702993e-06, "loss": 0.7764, "step": 18014 }, { "epoch": 0.6363222937979328, "grad_norm": 1.6374880075454712, "learning_rate": 3.085592004291532e-06, "loss": 0.7953, "step": 18015 }, { "epoch": 0.6363576156016407, "grad_norm": 1.8904176950454712, "learning_rate": 3.0850635955681784e-06, "loss": 0.7657, "step": 18016 }, { "epoch": 0.6363929374053486, "grad_norm": 1.7224222421646118, "learning_rate": 3.0845352119071505e-06, "loss": 0.7552, "step": 18017 }, { "epoch": 0.6364282592090565, "grad_norm": 1.7188754081726074, "learning_rate": 3.0840068533153667e-06, "loss": 0.7798, "step": 18018 }, { "epoch": 0.6364635810127645, "grad_norm": 1.668984055519104, "learning_rate": 3.083478519799741e-06, "loss": 0.782, "step": 18019 }, { "epoch": 0.6364989028164724, "grad_norm": 1.9299342632293701, "learning_rate": 3.0829502113671865e-06, "loss": 0.7747, "step": 18020 }, { "epoch": 0.6365342246201803, "grad_norm": 1.5968531370162964, "learning_rate": 3.0824219280246205e-06, "loss": 0.7837, "step": 18021 }, { "epoch": 0.6365695464238882, "grad_norm": 1.5610109567642212, "learning_rate": 3.0818936697789564e-06, "loss": 0.7524, "step": 18022 }, { "epoch": 0.636604868227596, "grad_norm": 1.7872203588485718, "learning_rate": 3.081365436637106e-06, "loss": 0.7664, "step": 18023 }, { "epoch": 0.6366401900313039, "grad_norm": 1.663804531097412, "learning_rate": 3.0808372286059853e-06, "loss": 0.7787, "step": 18024 }, { "epoch": 0.6366755118350118, "grad_norm": 1.5903328657150269, "learning_rate": 3.0803090456925054e-06, "loss": 0.7933, "step": 18025 }, { "epoch": 0.6367108336387197, "grad_norm": 1.7368128299713135, "learning_rate": 3.07978088790358e-06, "loss": 0.7718, "step": 18026 }, { "epoch": 0.6367461554424276, "grad_norm": 1.08327054977417, "learning_rate": 3.0792527552461217e-06, "loss": 0.6098, "step": 18027 }, { "epoch": 0.6367814772461355, "grad_norm": 1.5689805746078491, "learning_rate": 3.0787246477270427e-06, "loss": 0.7845, "step": 18028 }, { "epoch": 0.6368167990498435, "grad_norm": 1.6852680444717407, "learning_rate": 3.078196565353253e-06, "loss": 0.728, "step": 18029 }, { "epoch": 0.6368521208535514, "grad_norm": 1.6914989948272705, "learning_rate": 3.0776685081316665e-06, "loss": 0.7983, "step": 18030 }, { "epoch": 0.6368874426572593, "grad_norm": 1.7729653120040894, "learning_rate": 3.0771404760691924e-06, "loss": 0.7787, "step": 18031 }, { "epoch": 0.6369227644609672, "grad_norm": 1.740726351737976, "learning_rate": 3.0766124691727428e-06, "loss": 0.7982, "step": 18032 }, { "epoch": 0.6369580862646751, "grad_norm": 1.6889230012893677, "learning_rate": 3.076084487449228e-06, "loss": 0.8077, "step": 18033 }, { "epoch": 0.636993408068383, "grad_norm": 1.6063272953033447, "learning_rate": 3.075556530905559e-06, "loss": 0.7959, "step": 18034 }, { "epoch": 0.6370287298720909, "grad_norm": 1.757759928703308, "learning_rate": 3.075028599548643e-06, "loss": 0.8314, "step": 18035 }, { "epoch": 0.6370640516757988, "grad_norm": 6.368432998657227, "learning_rate": 3.07450069338539e-06, "loss": 0.7605, "step": 18036 }, { "epoch": 0.6370993734795067, "grad_norm": 1.6303515434265137, "learning_rate": 3.0739728124227107e-06, "loss": 0.7745, "step": 18037 }, { "epoch": 0.6371346952832146, "grad_norm": 1.4598262310028076, "learning_rate": 3.0734449566675133e-06, "loss": 0.7825, "step": 18038 }, { "epoch": 0.6371700170869226, "grad_norm": 1.740077257156372, "learning_rate": 3.0729171261267055e-06, "loss": 0.7876, "step": 18039 }, { "epoch": 0.6372053388906305, "grad_norm": 1.7074007987976074, "learning_rate": 3.0723893208071964e-06, "loss": 0.7815, "step": 18040 }, { "epoch": 0.6372406606943384, "grad_norm": 1.8573534488677979, "learning_rate": 3.071861540715893e-06, "loss": 0.822, "step": 18041 }, { "epoch": 0.6372759824980463, "grad_norm": 1.724900484085083, "learning_rate": 3.0713337858597043e-06, "loss": 0.8011, "step": 18042 }, { "epoch": 0.6373113043017542, "grad_norm": 1.7448660135269165, "learning_rate": 3.0708060562455365e-06, "loss": 0.8334, "step": 18043 }, { "epoch": 0.6373466261054621, "grad_norm": 1.0304934978485107, "learning_rate": 3.0702783518802955e-06, "loss": 0.5827, "step": 18044 }, { "epoch": 0.63738194790917, "grad_norm": 1.6594429016113281, "learning_rate": 3.0697506727708894e-06, "loss": 0.8273, "step": 18045 }, { "epoch": 0.6374172697128779, "grad_norm": 1.8472464084625244, "learning_rate": 3.0692230189242235e-06, "loss": 0.7718, "step": 18046 }, { "epoch": 0.6374525915165858, "grad_norm": 1.5451366901397705, "learning_rate": 3.0686953903472026e-06, "loss": 0.7663, "step": 18047 }, { "epoch": 0.6374879133202938, "grad_norm": 1.6451866626739502, "learning_rate": 3.0681677870467353e-06, "loss": 0.7563, "step": 18048 }, { "epoch": 0.6375232351240016, "grad_norm": 1.8528462648391724, "learning_rate": 3.0676402090297242e-06, "loss": 0.7641, "step": 18049 }, { "epoch": 0.6375585569277095, "grad_norm": 1.61583411693573, "learning_rate": 3.067112656303074e-06, "loss": 0.7945, "step": 18050 }, { "epoch": 0.6375938787314174, "grad_norm": 1.8219220638275146, "learning_rate": 3.066585128873691e-06, "loss": 0.7571, "step": 18051 }, { "epoch": 0.6376292005351253, "grad_norm": 1.8696767091751099, "learning_rate": 3.066057626748478e-06, "loss": 0.7612, "step": 18052 }, { "epoch": 0.6376645223388332, "grad_norm": 1.7077832221984863, "learning_rate": 3.065530149934341e-06, "loss": 0.7939, "step": 18053 }, { "epoch": 0.6376998441425411, "grad_norm": 1.784576416015625, "learning_rate": 3.065002698438179e-06, "loss": 0.7942, "step": 18054 }, { "epoch": 0.637735165946249, "grad_norm": 1.6520779132843018, "learning_rate": 3.064475272266899e-06, "loss": 0.7888, "step": 18055 }, { "epoch": 0.6377704877499569, "grad_norm": 1.7670384645462036, "learning_rate": 3.063947871427404e-06, "loss": 0.7775, "step": 18056 }, { "epoch": 0.6378058095536648, "grad_norm": 1.782774806022644, "learning_rate": 3.0634204959265936e-06, "loss": 0.8112, "step": 18057 }, { "epoch": 0.6378411313573727, "grad_norm": 1.6453558206558228, "learning_rate": 3.0628931457713717e-06, "loss": 0.79, "step": 18058 }, { "epoch": 0.6378764531610807, "grad_norm": 1.5196410417556763, "learning_rate": 3.0623658209686413e-06, "loss": 0.7783, "step": 18059 }, { "epoch": 0.6379117749647886, "grad_norm": 1.7816084623336792, "learning_rate": 3.061838521525301e-06, "loss": 0.8169, "step": 18060 }, { "epoch": 0.6379470967684965, "grad_norm": 1.7606563568115234, "learning_rate": 3.0613112474482554e-06, "loss": 0.7522, "step": 18061 }, { "epoch": 0.6379824185722044, "grad_norm": 2.015209436416626, "learning_rate": 3.0607839987444045e-06, "loss": 0.7741, "step": 18062 }, { "epoch": 0.6380177403759123, "grad_norm": 1.5027462244033813, "learning_rate": 3.0602567754206456e-06, "loss": 0.7705, "step": 18063 }, { "epoch": 0.6380530621796202, "grad_norm": 1.498355507850647, "learning_rate": 3.059729577483883e-06, "loss": 0.7401, "step": 18064 }, { "epoch": 0.6380883839833281, "grad_norm": 1.7720868587493896, "learning_rate": 3.059202404941014e-06, "loss": 0.7658, "step": 18065 }, { "epoch": 0.638123705787036, "grad_norm": 1.7342555522918701, "learning_rate": 3.0586752577989397e-06, "loss": 0.7548, "step": 18066 }, { "epoch": 0.6381590275907439, "grad_norm": 1.6803429126739502, "learning_rate": 3.0581481360645593e-06, "loss": 0.7606, "step": 18067 }, { "epoch": 0.6381943493944519, "grad_norm": 1.5551018714904785, "learning_rate": 3.05762103974477e-06, "loss": 0.7439, "step": 18068 }, { "epoch": 0.6382296711981598, "grad_norm": 1.6445382833480835, "learning_rate": 3.0570939688464723e-06, "loss": 0.7629, "step": 18069 }, { "epoch": 0.6382649930018677, "grad_norm": 1.6367225646972656, "learning_rate": 3.056566923376564e-06, "loss": 0.774, "step": 18070 }, { "epoch": 0.6383003148055756, "grad_norm": 2.034248113632202, "learning_rate": 3.0560399033419426e-06, "loss": 0.824, "step": 18071 }, { "epoch": 0.6383356366092835, "grad_norm": 1.5566262006759644, "learning_rate": 3.055512908749505e-06, "loss": 0.7591, "step": 18072 }, { "epoch": 0.6383709584129914, "grad_norm": 1.7684786319732666, "learning_rate": 3.0549859396061477e-06, "loss": 0.8231, "step": 18073 }, { "epoch": 0.6384062802166993, "grad_norm": 1.7132360935211182, "learning_rate": 3.05445899591877e-06, "loss": 0.782, "step": 18074 }, { "epoch": 0.6384416020204071, "grad_norm": 1.738585114479065, "learning_rate": 3.053932077694266e-06, "loss": 0.7526, "step": 18075 }, { "epoch": 0.638476923824115, "grad_norm": 1.6831623315811157, "learning_rate": 3.053405184939534e-06, "loss": 0.7606, "step": 18076 }, { "epoch": 0.6385122456278229, "grad_norm": 1.634519338607788, "learning_rate": 3.0528783176614697e-06, "loss": 0.8023, "step": 18077 }, { "epoch": 0.6385475674315308, "grad_norm": 1.660046935081482, "learning_rate": 3.052351475866966e-06, "loss": 0.8038, "step": 18078 }, { "epoch": 0.6385828892352388, "grad_norm": 1.7378424406051636, "learning_rate": 3.051824659562922e-06, "loss": 0.7913, "step": 18079 }, { "epoch": 0.6386182110389467, "grad_norm": 2.2154784202575684, "learning_rate": 3.05129786875623e-06, "loss": 0.7428, "step": 18080 }, { "epoch": 0.6386535328426546, "grad_norm": 1.4956351518630981, "learning_rate": 3.0507711034537847e-06, "loss": 0.7361, "step": 18081 }, { "epoch": 0.6386888546463625, "grad_norm": 1.6533043384552002, "learning_rate": 3.050244363662481e-06, "loss": 0.7553, "step": 18082 }, { "epoch": 0.6387241764500704, "grad_norm": 1.6341006755828857, "learning_rate": 3.0497176493892135e-06, "loss": 0.7894, "step": 18083 }, { "epoch": 0.6387594982537783, "grad_norm": 1.4572538137435913, "learning_rate": 3.0491909606408734e-06, "loss": 0.7679, "step": 18084 }, { "epoch": 0.6387948200574862, "grad_norm": 1.6968510150909424, "learning_rate": 3.0486642974243564e-06, "loss": 0.7488, "step": 18085 }, { "epoch": 0.6388301418611941, "grad_norm": 1.8798463344573975, "learning_rate": 3.048137659746554e-06, "loss": 0.7966, "step": 18086 }, { "epoch": 0.638865463664902, "grad_norm": 1.8632845878601074, "learning_rate": 3.0476110476143583e-06, "loss": 0.8125, "step": 18087 }, { "epoch": 0.63890078546861, "grad_norm": 1.6802436113357544, "learning_rate": 3.047084461034663e-06, "loss": 0.7618, "step": 18088 }, { "epoch": 0.6389361072723179, "grad_norm": 1.8836522102355957, "learning_rate": 3.0465579000143576e-06, "loss": 0.8007, "step": 18089 }, { "epoch": 0.6389714290760258, "grad_norm": 1.8525317907333374, "learning_rate": 3.046031364560339e-06, "loss": 0.7962, "step": 18090 }, { "epoch": 0.6390067508797337, "grad_norm": 1.525844931602478, "learning_rate": 3.045504854679492e-06, "loss": 0.7628, "step": 18091 }, { "epoch": 0.6390420726834416, "grad_norm": 1.5680288076400757, "learning_rate": 3.04497837037871e-06, "loss": 0.7425, "step": 18092 }, { "epoch": 0.6390773944871495, "grad_norm": 1.5829334259033203, "learning_rate": 3.044451911664884e-06, "loss": 0.756, "step": 18093 }, { "epoch": 0.6391127162908574, "grad_norm": 1.633508324623108, "learning_rate": 3.0439254785449023e-06, "loss": 0.7411, "step": 18094 }, { "epoch": 0.6391480380945653, "grad_norm": 1.7065609693527222, "learning_rate": 3.043399071025658e-06, "loss": 0.7834, "step": 18095 }, { "epoch": 0.6391833598982732, "grad_norm": 1.8596192598342896, "learning_rate": 3.0428726891140386e-06, "loss": 0.7729, "step": 18096 }, { "epoch": 0.6392186817019812, "grad_norm": 1.7716506719589233, "learning_rate": 3.042346332816933e-06, "loss": 0.7954, "step": 18097 }, { "epoch": 0.6392540035056891, "grad_norm": 1.6239583492279053, "learning_rate": 3.041820002141231e-06, "loss": 0.7999, "step": 18098 }, { "epoch": 0.639289325309397, "grad_norm": 1.6858524084091187, "learning_rate": 3.0412936970938202e-06, "loss": 0.7586, "step": 18099 }, { "epoch": 0.6393246471131049, "grad_norm": 1.5646787881851196, "learning_rate": 3.0407674176815892e-06, "loss": 0.7473, "step": 18100 }, { "epoch": 0.6393599689168127, "grad_norm": 2.321458339691162, "learning_rate": 3.040241163911427e-06, "loss": 0.807, "step": 18101 }, { "epoch": 0.6393952907205206, "grad_norm": 1.6933060884475708, "learning_rate": 3.039714935790219e-06, "loss": 0.7825, "step": 18102 }, { "epoch": 0.6394306125242285, "grad_norm": 1.6543753147125244, "learning_rate": 3.039188733324854e-06, "loss": 0.782, "step": 18103 }, { "epoch": 0.6394659343279364, "grad_norm": 1.5184701681137085, "learning_rate": 3.038662556522218e-06, "loss": 0.7784, "step": 18104 }, { "epoch": 0.6395012561316443, "grad_norm": 1.5967859029769897, "learning_rate": 3.0381364053891972e-06, "loss": 0.768, "step": 18105 }, { "epoch": 0.6395365779353522, "grad_norm": 1.5749003887176514, "learning_rate": 3.0376102799326795e-06, "loss": 0.7694, "step": 18106 }, { "epoch": 0.6395718997390601, "grad_norm": 1.5759612321853638, "learning_rate": 3.0370841801595496e-06, "loss": 0.7545, "step": 18107 }, { "epoch": 0.6396072215427681, "grad_norm": 2.433342218399048, "learning_rate": 3.0365581060766935e-06, "loss": 0.7809, "step": 18108 }, { "epoch": 0.639642543346476, "grad_norm": 1.6615986824035645, "learning_rate": 3.036032057690994e-06, "loss": 0.7525, "step": 18109 }, { "epoch": 0.6396778651501839, "grad_norm": 1.72797691822052, "learning_rate": 3.035506035009339e-06, "loss": 0.8251, "step": 18110 }, { "epoch": 0.6397131869538918, "grad_norm": 1.7594194412231445, "learning_rate": 3.034980038038612e-06, "loss": 0.7992, "step": 18111 }, { "epoch": 0.6397485087575997, "grad_norm": 1.5377271175384521, "learning_rate": 3.0344540667856946e-06, "loss": 0.7545, "step": 18112 }, { "epoch": 0.6397838305613076, "grad_norm": 1.5808382034301758, "learning_rate": 3.0339281212574757e-06, "loss": 0.7613, "step": 18113 }, { "epoch": 0.6398191523650155, "grad_norm": 1.749651551246643, "learning_rate": 3.0334022014608345e-06, "loss": 0.7813, "step": 18114 }, { "epoch": 0.6398544741687234, "grad_norm": 1.7094918489456177, "learning_rate": 3.0328763074026553e-06, "loss": 0.823, "step": 18115 }, { "epoch": 0.6398897959724313, "grad_norm": 1.7056711912155151, "learning_rate": 3.0323504390898218e-06, "loss": 0.7656, "step": 18116 }, { "epoch": 0.6399251177761393, "grad_norm": 1.9453595876693726, "learning_rate": 3.031824596529216e-06, "loss": 0.7903, "step": 18117 }, { "epoch": 0.6399604395798472, "grad_norm": 1.645883321762085, "learning_rate": 3.0312987797277195e-06, "loss": 0.7637, "step": 18118 }, { "epoch": 0.6399957613835551, "grad_norm": 1.507952332496643, "learning_rate": 3.0307729886922145e-06, "loss": 0.7454, "step": 18119 }, { "epoch": 0.640031083187263, "grad_norm": 1.7781357765197754, "learning_rate": 3.030247223429583e-06, "loss": 0.7906, "step": 18120 }, { "epoch": 0.6400664049909709, "grad_norm": 1.8753975629806519, "learning_rate": 3.0297214839467047e-06, "loss": 0.7625, "step": 18121 }, { "epoch": 0.6401017267946788, "grad_norm": 1.9068350791931152, "learning_rate": 3.029195770250462e-06, "loss": 0.7663, "step": 18122 }, { "epoch": 0.6401370485983867, "grad_norm": 1.5393619537353516, "learning_rate": 3.0286700823477333e-06, "loss": 0.7536, "step": 18123 }, { "epoch": 0.6401723704020946, "grad_norm": 1.7168117761611938, "learning_rate": 3.028144420245401e-06, "loss": 0.7863, "step": 18124 }, { "epoch": 0.6402076922058025, "grad_norm": 1.7464488744735718, "learning_rate": 3.027618783950345e-06, "loss": 0.7753, "step": 18125 }, { "epoch": 0.6402430140095104, "grad_norm": 0.9328460097312927, "learning_rate": 3.027093173469442e-06, "loss": 0.5862, "step": 18126 }, { "epoch": 0.6402783358132182, "grad_norm": 1.6575665473937988, "learning_rate": 3.0265675888095747e-06, "loss": 0.772, "step": 18127 }, { "epoch": 0.6403136576169262, "grad_norm": 1.6117925643920898, "learning_rate": 3.026042029977618e-06, "loss": 0.7759, "step": 18128 }, { "epoch": 0.6403489794206341, "grad_norm": 1.86098313331604, "learning_rate": 3.0255164969804527e-06, "loss": 0.7592, "step": 18129 }, { "epoch": 0.640384301224342, "grad_norm": 1.9014077186584473, "learning_rate": 3.0249909898249564e-06, "loss": 0.7669, "step": 18130 }, { "epoch": 0.6404196230280499, "grad_norm": 1.6612088680267334, "learning_rate": 3.024465508518006e-06, "loss": 0.7998, "step": 18131 }, { "epoch": 0.6404549448317578, "grad_norm": 1.7568621635437012, "learning_rate": 3.0239400530664797e-06, "loss": 0.7932, "step": 18132 }, { "epoch": 0.6404902666354657, "grad_norm": 1.6004178524017334, "learning_rate": 3.0234146234772543e-06, "loss": 0.7645, "step": 18133 }, { "epoch": 0.6405255884391736, "grad_norm": 2.2486326694488525, "learning_rate": 3.022889219757208e-06, "loss": 0.7771, "step": 18134 }, { "epoch": 0.6405609102428815, "grad_norm": 1.9445101022720337, "learning_rate": 3.0223638419132155e-06, "loss": 0.794, "step": 18135 }, { "epoch": 0.6405962320465894, "grad_norm": 1.6360739469528198, "learning_rate": 3.0218384899521525e-06, "loss": 0.746, "step": 18136 }, { "epoch": 0.6406315538502974, "grad_norm": 1.6955798864364624, "learning_rate": 3.0213131638808967e-06, "loss": 0.7774, "step": 18137 }, { "epoch": 0.6406668756540053, "grad_norm": 1.8846254348754883, "learning_rate": 3.0207878637063214e-06, "loss": 0.7579, "step": 18138 }, { "epoch": 0.6407021974577132, "grad_norm": 1.872081995010376, "learning_rate": 3.0202625894353023e-06, "loss": 0.8182, "step": 18139 }, { "epoch": 0.6407375192614211, "grad_norm": 1.621635913848877, "learning_rate": 3.019737341074715e-06, "loss": 0.7951, "step": 18140 }, { "epoch": 0.640772841065129, "grad_norm": 1.8802293539047241, "learning_rate": 3.0192121186314334e-06, "loss": 0.7985, "step": 18141 }, { "epoch": 0.6408081628688369, "grad_norm": 1.5743614435195923, "learning_rate": 3.0186869221123295e-06, "loss": 0.7909, "step": 18142 }, { "epoch": 0.6408434846725448, "grad_norm": 1.5178589820861816, "learning_rate": 3.01816175152428e-06, "loss": 0.7741, "step": 18143 }, { "epoch": 0.6408788064762527, "grad_norm": 1.6434954404830933, "learning_rate": 3.0176366068741576e-06, "loss": 0.7816, "step": 18144 }, { "epoch": 0.6409141282799606, "grad_norm": 3.082606077194214, "learning_rate": 3.0171114881688353e-06, "loss": 0.7833, "step": 18145 }, { "epoch": 0.6409494500836685, "grad_norm": 1.6650395393371582, "learning_rate": 3.016586395415183e-06, "loss": 0.7684, "step": 18146 }, { "epoch": 0.6409847718873765, "grad_norm": 1.785321593284607, "learning_rate": 3.0160613286200757e-06, "loss": 0.8149, "step": 18147 }, { "epoch": 0.6410200936910844, "grad_norm": 1.6432380676269531, "learning_rate": 3.0155362877903853e-06, "loss": 0.7906, "step": 18148 }, { "epoch": 0.6410554154947923, "grad_norm": 1.7363823652267456, "learning_rate": 3.0150112729329815e-06, "loss": 0.795, "step": 18149 }, { "epoch": 0.6410907372985002, "grad_norm": 1.7653353214263916, "learning_rate": 3.014486284054739e-06, "loss": 0.7911, "step": 18150 }, { "epoch": 0.6411260591022081, "grad_norm": 1.6241021156311035, "learning_rate": 3.013961321162526e-06, "loss": 0.7694, "step": 18151 }, { "epoch": 0.641161380905916, "grad_norm": 1.657260775566101, "learning_rate": 3.013436384263213e-06, "loss": 0.7688, "step": 18152 }, { "epoch": 0.6411967027096239, "grad_norm": 1.6262394189834595, "learning_rate": 3.0129114733636723e-06, "loss": 0.7322, "step": 18153 }, { "epoch": 0.6412320245133317, "grad_norm": 1.573543906211853, "learning_rate": 3.0123865884707727e-06, "loss": 0.7606, "step": 18154 }, { "epoch": 0.6412673463170396, "grad_norm": 2.1322057247161865, "learning_rate": 3.0118617295913826e-06, "loss": 0.7921, "step": 18155 }, { "epoch": 0.6413026681207475, "grad_norm": 1.758040189743042, "learning_rate": 3.0113368967323738e-06, "loss": 0.7562, "step": 18156 }, { "epoch": 0.6413379899244555, "grad_norm": 1.7806506156921387, "learning_rate": 3.010812089900612e-06, "loss": 0.7538, "step": 18157 }, { "epoch": 0.6413733117281634, "grad_norm": 1.7289084196090698, "learning_rate": 3.0102873091029694e-06, "loss": 0.8106, "step": 18158 }, { "epoch": 0.6414086335318713, "grad_norm": 1.6607933044433594, "learning_rate": 3.0097625543463127e-06, "loss": 0.7935, "step": 18159 }, { "epoch": 0.6414439553355792, "grad_norm": 1.5985077619552612, "learning_rate": 3.0092378256375077e-06, "loss": 0.7689, "step": 18160 }, { "epoch": 0.6414792771392871, "grad_norm": 1.7839244604110718, "learning_rate": 3.0087131229834256e-06, "loss": 0.763, "step": 18161 }, { "epoch": 0.641514598942995, "grad_norm": 1.7452751398086548, "learning_rate": 3.0081884463909316e-06, "loss": 0.8032, "step": 18162 }, { "epoch": 0.6415499207467029, "grad_norm": 1.7152022123336792, "learning_rate": 3.007663795866892e-06, "loss": 0.7802, "step": 18163 }, { "epoch": 0.6415852425504108, "grad_norm": 1.755399227142334, "learning_rate": 3.0071391714181762e-06, "loss": 0.7778, "step": 18164 }, { "epoch": 0.6416205643541187, "grad_norm": 1.6363176107406616, "learning_rate": 3.006614573051646e-06, "loss": 0.7792, "step": 18165 }, { "epoch": 0.6416558861578266, "grad_norm": 1.675212025642395, "learning_rate": 3.0060900007741712e-06, "loss": 0.7791, "step": 18166 }, { "epoch": 0.6416912079615346, "grad_norm": 1.7008603811264038, "learning_rate": 3.005565454592614e-06, "loss": 0.7999, "step": 18167 }, { "epoch": 0.6417265297652425, "grad_norm": 1.867519736289978, "learning_rate": 3.005040934513842e-06, "loss": 0.7298, "step": 18168 }, { "epoch": 0.6417618515689504, "grad_norm": 1.6882073879241943, "learning_rate": 3.004516440544719e-06, "loss": 0.803, "step": 18169 }, { "epoch": 0.6417971733726583, "grad_norm": 1.666333556175232, "learning_rate": 3.00399197269211e-06, "loss": 0.7804, "step": 18170 }, { "epoch": 0.6418324951763662, "grad_norm": 2.7615976333618164, "learning_rate": 3.0034675309628785e-06, "loss": 0.7418, "step": 18171 }, { "epoch": 0.6418678169800741, "grad_norm": 2.8381035327911377, "learning_rate": 3.0029431153638886e-06, "loss": 0.7884, "step": 18172 }, { "epoch": 0.641903138783782, "grad_norm": 1.5090818405151367, "learning_rate": 3.0024187259020036e-06, "loss": 0.7664, "step": 18173 }, { "epoch": 0.6419384605874899, "grad_norm": 2.0711333751678467, "learning_rate": 3.0018943625840878e-06, "loss": 0.7835, "step": 18174 }, { "epoch": 0.6419737823911978, "grad_norm": 1.5756540298461914, "learning_rate": 3.001370025417003e-06, "loss": 0.7598, "step": 18175 }, { "epoch": 0.6420091041949058, "grad_norm": 1.5651406049728394, "learning_rate": 3.0008457144076098e-06, "loss": 0.7801, "step": 18176 }, { "epoch": 0.6420444259986137, "grad_norm": 1.4297834634780884, "learning_rate": 3.0003214295627738e-06, "loss": 0.7484, "step": 18177 }, { "epoch": 0.6420797478023216, "grad_norm": 1.716301679611206, "learning_rate": 2.9997971708893547e-06, "loss": 0.8125, "step": 18178 }, { "epoch": 0.6421150696060295, "grad_norm": 1.827019453048706, "learning_rate": 2.999272938394214e-06, "loss": 0.7918, "step": 18179 }, { "epoch": 0.6421503914097373, "grad_norm": 1.745708703994751, "learning_rate": 2.9987487320842136e-06, "loss": 0.7693, "step": 18180 }, { "epoch": 0.6421857132134452, "grad_norm": 1.6065181493759155, "learning_rate": 2.9982245519662123e-06, "loss": 0.7384, "step": 18181 }, { "epoch": 0.6422210350171531, "grad_norm": 1.7754616737365723, "learning_rate": 2.9977003980470744e-06, "loss": 0.7645, "step": 18182 }, { "epoch": 0.642256356820861, "grad_norm": 2.141796112060547, "learning_rate": 2.9971762703336547e-06, "loss": 0.7981, "step": 18183 }, { "epoch": 0.6422916786245689, "grad_norm": 1.779967188835144, "learning_rate": 2.9966521688328165e-06, "loss": 0.8116, "step": 18184 }, { "epoch": 0.6423270004282768, "grad_norm": 1.659313440322876, "learning_rate": 2.996128093551418e-06, "loss": 0.7822, "step": 18185 }, { "epoch": 0.6423623222319848, "grad_norm": 1.6707245111465454, "learning_rate": 2.9956040444963173e-06, "loss": 0.7978, "step": 18186 }, { "epoch": 0.6423976440356927, "grad_norm": 1.601757287979126, "learning_rate": 2.9950800216743756e-06, "loss": 0.7796, "step": 18187 }, { "epoch": 0.6424329658394006, "grad_norm": 1.6047730445861816, "learning_rate": 2.99455602509245e-06, "loss": 0.7681, "step": 18188 }, { "epoch": 0.6424682876431085, "grad_norm": 1.6213361024856567, "learning_rate": 2.9940320547573966e-06, "loss": 0.7886, "step": 18189 }, { "epoch": 0.6425036094468164, "grad_norm": 2.161170482635498, "learning_rate": 2.993508110676075e-06, "loss": 0.7388, "step": 18190 }, { "epoch": 0.6425389312505243, "grad_norm": 1.6527178287506104, "learning_rate": 2.9929841928553417e-06, "loss": 0.7396, "step": 18191 }, { "epoch": 0.6425742530542322, "grad_norm": 1.707777738571167, "learning_rate": 2.992460301302055e-06, "loss": 0.8163, "step": 18192 }, { "epoch": 0.6426095748579401, "grad_norm": 1.573752522468567, "learning_rate": 2.9919364360230703e-06, "loss": 0.8021, "step": 18193 }, { "epoch": 0.642644896661648, "grad_norm": 1.9452606439590454, "learning_rate": 2.991412597025243e-06, "loss": 0.8027, "step": 18194 }, { "epoch": 0.642680218465356, "grad_norm": 1.548362374305725, "learning_rate": 2.9908887843154313e-06, "loss": 0.7556, "step": 18195 }, { "epoch": 0.6427155402690639, "grad_norm": 1.8220608234405518, "learning_rate": 2.990364997900489e-06, "loss": 0.8268, "step": 18196 }, { "epoch": 0.6427508620727718, "grad_norm": 1.738351583480835, "learning_rate": 2.9898412377872715e-06, "loss": 0.7624, "step": 18197 }, { "epoch": 0.6427861838764797, "grad_norm": 1.564766526222229, "learning_rate": 2.989317503982635e-06, "loss": 0.7554, "step": 18198 }, { "epoch": 0.6428215056801876, "grad_norm": 1.8017171621322632, "learning_rate": 2.9887937964934327e-06, "loss": 0.7898, "step": 18199 }, { "epoch": 0.6428568274838955, "grad_norm": 1.6265571117401123, "learning_rate": 2.988270115326518e-06, "loss": 0.7333, "step": 18200 }, { "epoch": 0.6428921492876034, "grad_norm": 1.7132484912872314, "learning_rate": 2.9877464604887484e-06, "loss": 0.7509, "step": 18201 }, { "epoch": 0.6429274710913113, "grad_norm": 1.836429476737976, "learning_rate": 2.9872228319869733e-06, "loss": 0.7768, "step": 18202 }, { "epoch": 0.6429627928950192, "grad_norm": 1.5787684917449951, "learning_rate": 2.9866992298280477e-06, "loss": 0.7508, "step": 18203 }, { "epoch": 0.6429981146987271, "grad_norm": 1.780908226966858, "learning_rate": 2.9861756540188235e-06, "loss": 0.8139, "step": 18204 }, { "epoch": 0.643033436502435, "grad_norm": 1.6916316747665405, "learning_rate": 2.9856521045661542e-06, "loss": 0.8139, "step": 18205 }, { "epoch": 0.6430687583061429, "grad_norm": 1.631664752960205, "learning_rate": 2.9851285814768917e-06, "loss": 0.7778, "step": 18206 }, { "epoch": 0.6431040801098508, "grad_norm": 1.7442655563354492, "learning_rate": 2.9846050847578865e-06, "loss": 0.801, "step": 18207 }, { "epoch": 0.6431394019135587, "grad_norm": 1.8665266036987305, "learning_rate": 2.9840816144159922e-06, "loss": 0.7655, "step": 18208 }, { "epoch": 0.6431747237172666, "grad_norm": 1.819592833518982, "learning_rate": 2.9835581704580587e-06, "loss": 0.7635, "step": 18209 }, { "epoch": 0.6432100455209745, "grad_norm": 1.705003261566162, "learning_rate": 2.9830347528909354e-06, "loss": 0.749, "step": 18210 }, { "epoch": 0.6432453673246824, "grad_norm": 3.124519109725952, "learning_rate": 2.9825113617214763e-06, "loss": 0.7966, "step": 18211 }, { "epoch": 0.6432806891283903, "grad_norm": 1.7165710926055908, "learning_rate": 2.981987996956528e-06, "loss": 0.7702, "step": 18212 }, { "epoch": 0.6433160109320982, "grad_norm": 1.6829768419265747, "learning_rate": 2.981464658602941e-06, "loss": 0.741, "step": 18213 }, { "epoch": 0.6433513327358061, "grad_norm": 1.5789496898651123, "learning_rate": 2.980941346667566e-06, "loss": 0.7869, "step": 18214 }, { "epoch": 0.643386654539514, "grad_norm": 1.6282477378845215, "learning_rate": 2.9804180611572505e-06, "loss": 0.7796, "step": 18215 }, { "epoch": 0.643421976343222, "grad_norm": 1.8172919750213623, "learning_rate": 2.9798948020788444e-06, "loss": 0.7871, "step": 18216 }, { "epoch": 0.6434572981469299, "grad_norm": 1.61689293384552, "learning_rate": 2.979371569439196e-06, "loss": 0.772, "step": 18217 }, { "epoch": 0.6434926199506378, "grad_norm": 1.651757836341858, "learning_rate": 2.9788483632451504e-06, "loss": 0.7764, "step": 18218 }, { "epoch": 0.6435279417543457, "grad_norm": 1.761725902557373, "learning_rate": 2.978325183503561e-06, "loss": 0.7863, "step": 18219 }, { "epoch": 0.6435632635580536, "grad_norm": 1.6897090673446655, "learning_rate": 2.9778020302212684e-06, "loss": 0.7873, "step": 18220 }, { "epoch": 0.6435985853617615, "grad_norm": 1.6978583335876465, "learning_rate": 2.977278903405124e-06, "loss": 0.8269, "step": 18221 }, { "epoch": 0.6436339071654694, "grad_norm": 2.919581890106201, "learning_rate": 2.9767558030619735e-06, "loss": 0.7755, "step": 18222 }, { "epoch": 0.6436692289691773, "grad_norm": 1.7766811847686768, "learning_rate": 2.9762327291986614e-06, "loss": 0.7373, "step": 18223 }, { "epoch": 0.6437045507728852, "grad_norm": 2.013300657272339, "learning_rate": 2.975709681822036e-06, "loss": 0.8045, "step": 18224 }, { "epoch": 0.6437398725765932, "grad_norm": 1.6327452659606934, "learning_rate": 2.975186660938941e-06, "loss": 0.8053, "step": 18225 }, { "epoch": 0.6437751943803011, "grad_norm": 1.6752209663391113, "learning_rate": 2.974663666556222e-06, "loss": 0.8392, "step": 18226 }, { "epoch": 0.643810516184009, "grad_norm": 1.6832443475723267, "learning_rate": 2.9741406986807257e-06, "loss": 0.7734, "step": 18227 }, { "epoch": 0.6438458379877169, "grad_norm": 0.9337694048881531, "learning_rate": 2.973617757319294e-06, "loss": 0.58, "step": 18228 }, { "epoch": 0.6438811597914248, "grad_norm": 1.9302983283996582, "learning_rate": 2.9730948424787728e-06, "loss": 0.7806, "step": 18229 }, { "epoch": 0.6439164815951327, "grad_norm": 1.639438271522522, "learning_rate": 2.9725719541660047e-06, "loss": 0.7885, "step": 18230 }, { "epoch": 0.6439518033988406, "grad_norm": 1.800153136253357, "learning_rate": 2.972049092387833e-06, "loss": 0.8002, "step": 18231 }, { "epoch": 0.6439871252025484, "grad_norm": 1.5867908000946045, "learning_rate": 2.971526257151103e-06, "loss": 0.7738, "step": 18232 }, { "epoch": 0.6440224470062563, "grad_norm": 2.941148519515991, "learning_rate": 2.971003448462656e-06, "loss": 0.7453, "step": 18233 }, { "epoch": 0.6440577688099642, "grad_norm": 2.169497489929199, "learning_rate": 2.9704806663293338e-06, "loss": 0.7956, "step": 18234 }, { "epoch": 0.6440930906136721, "grad_norm": 1.5483977794647217, "learning_rate": 2.9699579107579797e-06, "loss": 0.7598, "step": 18235 }, { "epoch": 0.6441284124173801, "grad_norm": 1.7833906412124634, "learning_rate": 2.9694351817554347e-06, "loss": 0.767, "step": 18236 }, { "epoch": 0.644163734221088, "grad_norm": 0.8905993700027466, "learning_rate": 2.9689124793285396e-06, "loss": 0.5817, "step": 18237 }, { "epoch": 0.6441990560247959, "grad_norm": 1.7754290103912354, "learning_rate": 2.968389803484138e-06, "loss": 0.7713, "step": 18238 }, { "epoch": 0.6442343778285038, "grad_norm": 1.5954172611236572, "learning_rate": 2.9678671542290683e-06, "loss": 0.7761, "step": 18239 }, { "epoch": 0.6442696996322117, "grad_norm": 1.7267388105392456, "learning_rate": 2.9673445315701703e-06, "loss": 0.7762, "step": 18240 }, { "epoch": 0.6443050214359196, "grad_norm": 1.6367993354797363, "learning_rate": 2.9668219355142847e-06, "loss": 0.7816, "step": 18241 }, { "epoch": 0.6443403432396275, "grad_norm": 1.686031460762024, "learning_rate": 2.966299366068252e-06, "loss": 0.8001, "step": 18242 }, { "epoch": 0.6443756650433354, "grad_norm": 1.5496504306793213, "learning_rate": 2.965776823238911e-06, "loss": 0.7609, "step": 18243 }, { "epoch": 0.6444109868470433, "grad_norm": 2.4882588386535645, "learning_rate": 2.9652543070330997e-06, "loss": 0.7788, "step": 18244 }, { "epoch": 0.6444463086507513, "grad_norm": 1.5950026512145996, "learning_rate": 2.964731817457659e-06, "loss": 0.7406, "step": 18245 }, { "epoch": 0.6444816304544592, "grad_norm": 1.687277913093567, "learning_rate": 2.964209354519425e-06, "loss": 0.7995, "step": 18246 }, { "epoch": 0.6445169522581671, "grad_norm": 1.9241061210632324, "learning_rate": 2.963686918225235e-06, "loss": 0.795, "step": 18247 }, { "epoch": 0.644552274061875, "grad_norm": 1.6945217847824097, "learning_rate": 2.9631645085819293e-06, "loss": 0.8109, "step": 18248 }, { "epoch": 0.6445875958655829, "grad_norm": 1.711254596710205, "learning_rate": 2.9626421255963426e-06, "loss": 0.7838, "step": 18249 }, { "epoch": 0.6446229176692908, "grad_norm": 1.6502420902252197, "learning_rate": 2.962119769275314e-06, "loss": 0.7565, "step": 18250 }, { "epoch": 0.6446582394729987, "grad_norm": 1.907318115234375, "learning_rate": 2.9615974396256785e-06, "loss": 0.771, "step": 18251 }, { "epoch": 0.6446935612767066, "grad_norm": 1.4922696352005005, "learning_rate": 2.961075136654271e-06, "loss": 0.7434, "step": 18252 }, { "epoch": 0.6447288830804145, "grad_norm": 1.8309766054153442, "learning_rate": 2.9605528603679314e-06, "loss": 0.7918, "step": 18253 }, { "epoch": 0.6447642048841224, "grad_norm": 0.9947876930236816, "learning_rate": 2.960030610773491e-06, "loss": 0.5719, "step": 18254 }, { "epoch": 0.6447995266878304, "grad_norm": 1.833884835243225, "learning_rate": 2.9595083878777864e-06, "loss": 0.7449, "step": 18255 }, { "epoch": 0.6448348484915383, "grad_norm": 1.4816662073135376, "learning_rate": 2.958986191687655e-06, "loss": 0.755, "step": 18256 }, { "epoch": 0.6448701702952462, "grad_norm": 1.6919126510620117, "learning_rate": 2.958464022209926e-06, "loss": 0.7384, "step": 18257 }, { "epoch": 0.644905492098954, "grad_norm": 2.5043325424194336, "learning_rate": 2.957941879451437e-06, "loss": 0.7881, "step": 18258 }, { "epoch": 0.6449408139026619, "grad_norm": 1.8853939771652222, "learning_rate": 2.9574197634190192e-06, "loss": 0.7815, "step": 18259 }, { "epoch": 0.6449761357063698, "grad_norm": 1.7450617551803589, "learning_rate": 2.95689767411951e-06, "loss": 0.7464, "step": 18260 }, { "epoch": 0.6450114575100777, "grad_norm": 1.6329126358032227, "learning_rate": 2.9563756115597388e-06, "loss": 0.7803, "step": 18261 }, { "epoch": 0.6450467793137856, "grad_norm": 1.797391653060913, "learning_rate": 2.9558535757465386e-06, "loss": 0.8111, "step": 18262 }, { "epoch": 0.6450821011174935, "grad_norm": 0.8760268688201904, "learning_rate": 2.9553315666867432e-06, "loss": 0.5635, "step": 18263 }, { "epoch": 0.6451174229212014, "grad_norm": 1.7990049123764038, "learning_rate": 2.954809584387185e-06, "loss": 0.7523, "step": 18264 }, { "epoch": 0.6451527447249094, "grad_norm": 1.7660235166549683, "learning_rate": 2.954287628854693e-06, "loss": 0.8168, "step": 18265 }, { "epoch": 0.6451880665286173, "grad_norm": 1.9232208728790283, "learning_rate": 2.9537657000961006e-06, "loss": 0.8162, "step": 18266 }, { "epoch": 0.6452233883323252, "grad_norm": 1.6686453819274902, "learning_rate": 2.9532437981182384e-06, "loss": 0.8199, "step": 18267 }, { "epoch": 0.6452587101360331, "grad_norm": 1.8463406562805176, "learning_rate": 2.9527219229279354e-06, "loss": 0.7698, "step": 18268 }, { "epoch": 0.645294031939741, "grad_norm": 1.61854887008667, "learning_rate": 2.952200074532024e-06, "loss": 0.7696, "step": 18269 }, { "epoch": 0.6453293537434489, "grad_norm": 1.6356345415115356, "learning_rate": 2.9516782529373334e-06, "loss": 0.7746, "step": 18270 }, { "epoch": 0.6453646755471568, "grad_norm": 1.7437902688980103, "learning_rate": 2.9511564581506914e-06, "loss": 0.825, "step": 18271 }, { "epoch": 0.6453999973508647, "grad_norm": 1.7234704494476318, "learning_rate": 2.9506346901789294e-06, "loss": 0.7997, "step": 18272 }, { "epoch": 0.6454353191545726, "grad_norm": 2.098306894302368, "learning_rate": 2.9501129490288747e-06, "loss": 0.7375, "step": 18273 }, { "epoch": 0.6454706409582806, "grad_norm": 1.6166120767593384, "learning_rate": 2.9495912347073596e-06, "loss": 0.76, "step": 18274 }, { "epoch": 0.6455059627619885, "grad_norm": 1.6903802156448364, "learning_rate": 2.949069547221206e-06, "loss": 0.7571, "step": 18275 }, { "epoch": 0.6455412845656964, "grad_norm": 1.6799412965774536, "learning_rate": 2.9485478865772454e-06, "loss": 0.793, "step": 18276 }, { "epoch": 0.6455766063694043, "grad_norm": 1.6214361190795898, "learning_rate": 2.9480262527823043e-06, "loss": 0.7696, "step": 18277 }, { "epoch": 0.6456119281731122, "grad_norm": 1.5901885032653809, "learning_rate": 2.947504645843209e-06, "loss": 0.752, "step": 18278 }, { "epoch": 0.6456472499768201, "grad_norm": 1.709851861000061, "learning_rate": 2.9469830657667886e-06, "loss": 0.8187, "step": 18279 }, { "epoch": 0.645682571780528, "grad_norm": 2.0691275596618652, "learning_rate": 2.9464615125598674e-06, "loss": 0.7912, "step": 18280 }, { "epoch": 0.6457178935842359, "grad_norm": 2.290384531021118, "learning_rate": 2.9459399862292702e-06, "loss": 0.8177, "step": 18281 }, { "epoch": 0.6457532153879438, "grad_norm": 1.6205326318740845, "learning_rate": 2.9454184867818264e-06, "loss": 0.8018, "step": 18282 }, { "epoch": 0.6457885371916517, "grad_norm": 1.5793530941009521, "learning_rate": 2.9448970142243578e-06, "loss": 0.7222, "step": 18283 }, { "epoch": 0.6458238589953595, "grad_norm": 1.688000202178955, "learning_rate": 2.944375568563692e-06, "loss": 0.7781, "step": 18284 }, { "epoch": 0.6458591807990675, "grad_norm": 1.7413455247879028, "learning_rate": 2.943854149806652e-06, "loss": 0.7977, "step": 18285 }, { "epoch": 0.6458945026027754, "grad_norm": 0.8642389178276062, "learning_rate": 2.9433327579600614e-06, "loss": 0.5931, "step": 18286 }, { "epoch": 0.6459298244064833, "grad_norm": 1.7776182889938354, "learning_rate": 2.942811393030746e-06, "loss": 0.7963, "step": 18287 }, { "epoch": 0.6459651462101912, "grad_norm": 1.6585445404052734, "learning_rate": 2.9422900550255285e-06, "loss": 0.7983, "step": 18288 }, { "epoch": 0.6460004680138991, "grad_norm": 1.472519874572754, "learning_rate": 2.941768743951231e-06, "loss": 0.7298, "step": 18289 }, { "epoch": 0.646035789817607, "grad_norm": 1.858690619468689, "learning_rate": 2.941247459814679e-06, "loss": 0.7686, "step": 18290 }, { "epoch": 0.6460711116213149, "grad_norm": 2.308150053024292, "learning_rate": 2.9407262026226925e-06, "loss": 0.8088, "step": 18291 }, { "epoch": 0.6461064334250228, "grad_norm": 1.7402726411819458, "learning_rate": 2.940204972382093e-06, "loss": 0.7837, "step": 18292 }, { "epoch": 0.6461417552287307, "grad_norm": 1.4496312141418457, "learning_rate": 2.939683769099706e-06, "loss": 0.7181, "step": 18293 }, { "epoch": 0.6461770770324387, "grad_norm": 1.5850101709365845, "learning_rate": 2.9391625927823497e-06, "loss": 0.7378, "step": 18294 }, { "epoch": 0.6462123988361466, "grad_norm": 1.6617026329040527, "learning_rate": 2.938641443436846e-06, "loss": 0.7844, "step": 18295 }, { "epoch": 0.6462477206398545, "grad_norm": 1.4805943965911865, "learning_rate": 2.938120321070014e-06, "loss": 0.7865, "step": 18296 }, { "epoch": 0.6462830424435624, "grad_norm": 1.7167811393737793, "learning_rate": 2.9375992256886773e-06, "loss": 0.7757, "step": 18297 }, { "epoch": 0.6463183642472703, "grad_norm": 1.784045934677124, "learning_rate": 2.937078157299654e-06, "loss": 0.7844, "step": 18298 }, { "epoch": 0.6463536860509782, "grad_norm": 1.7555508613586426, "learning_rate": 2.936557115909763e-06, "loss": 0.7658, "step": 18299 }, { "epoch": 0.6463890078546861, "grad_norm": 1.568293571472168, "learning_rate": 2.9360361015258253e-06, "loss": 0.7777, "step": 18300 }, { "epoch": 0.646424329658394, "grad_norm": 1.5610668659210205, "learning_rate": 2.9355151141546594e-06, "loss": 0.7525, "step": 18301 }, { "epoch": 0.6464596514621019, "grad_norm": 1.6215393543243408, "learning_rate": 2.934994153803083e-06, "loss": 0.771, "step": 18302 }, { "epoch": 0.6464949732658098, "grad_norm": 1.7617565393447876, "learning_rate": 2.9344732204779153e-06, "loss": 0.7819, "step": 18303 }, { "epoch": 0.6465302950695178, "grad_norm": 2.4112582206726074, "learning_rate": 2.933952314185974e-06, "loss": 0.8098, "step": 18304 }, { "epoch": 0.6465656168732257, "grad_norm": 1.7397425174713135, "learning_rate": 2.9334314349340754e-06, "loss": 0.7734, "step": 18305 }, { "epoch": 0.6466009386769336, "grad_norm": 2.116330623626709, "learning_rate": 2.9329105827290395e-06, "loss": 0.7957, "step": 18306 }, { "epoch": 0.6466362604806415, "grad_norm": 1.5511177778244019, "learning_rate": 2.9323897575776795e-06, "loss": 0.771, "step": 18307 }, { "epoch": 0.6466715822843494, "grad_norm": 1.7415399551391602, "learning_rate": 2.9318689594868144e-06, "loss": 0.8105, "step": 18308 }, { "epoch": 0.6467069040880573, "grad_norm": 2.029313564300537, "learning_rate": 2.9313481884632598e-06, "loss": 0.7862, "step": 18309 }, { "epoch": 0.6467422258917651, "grad_norm": 1.4939993619918823, "learning_rate": 2.930827444513831e-06, "loss": 0.7266, "step": 18310 }, { "epoch": 0.646777547695473, "grad_norm": 1.9308090209960938, "learning_rate": 2.9303067276453456e-06, "loss": 0.8024, "step": 18311 }, { "epoch": 0.6468128694991809, "grad_norm": 1.745793104171753, "learning_rate": 2.9297860378646148e-06, "loss": 0.788, "step": 18312 }, { "epoch": 0.6468481913028888, "grad_norm": 1.902633547782898, "learning_rate": 2.929265375178455e-06, "loss": 0.7717, "step": 18313 }, { "epoch": 0.6468835131065968, "grad_norm": 1.747689127922058, "learning_rate": 2.9287447395936818e-06, "loss": 0.7779, "step": 18314 }, { "epoch": 0.6469188349103047, "grad_norm": 2.0029807090759277, "learning_rate": 2.9282241311171067e-06, "loss": 0.7501, "step": 18315 }, { "epoch": 0.6469541567140126, "grad_norm": 1.8346786499023438, "learning_rate": 2.927703549755545e-06, "loss": 0.7734, "step": 18316 }, { "epoch": 0.6469894785177205, "grad_norm": 1.8515115976333618, "learning_rate": 2.927182995515809e-06, "loss": 0.7545, "step": 18317 }, { "epoch": 0.6470248003214284, "grad_norm": 1.736720323562622, "learning_rate": 2.926662468404714e-06, "loss": 0.7819, "step": 18318 }, { "epoch": 0.6470601221251363, "grad_norm": 1.5613601207733154, "learning_rate": 2.9261419684290705e-06, "loss": 0.771, "step": 18319 }, { "epoch": 0.6470954439288442, "grad_norm": 1.7016910314559937, "learning_rate": 2.92562149559569e-06, "loss": 0.748, "step": 18320 }, { "epoch": 0.6471307657325521, "grad_norm": 1.6237484216690063, "learning_rate": 2.9251010499113863e-06, "loss": 0.7733, "step": 18321 }, { "epoch": 0.64716608753626, "grad_norm": 1.5701323747634888, "learning_rate": 2.9245806313829703e-06, "loss": 0.7445, "step": 18322 }, { "epoch": 0.647201409339968, "grad_norm": 1.5870951414108276, "learning_rate": 2.924060240017252e-06, "loss": 0.7834, "step": 18323 }, { "epoch": 0.6472367311436759, "grad_norm": 2.819794178009033, "learning_rate": 2.923539875821044e-06, "loss": 0.7722, "step": 18324 }, { "epoch": 0.6472720529473838, "grad_norm": 2.3209197521209717, "learning_rate": 2.9230195388011553e-06, "loss": 0.7759, "step": 18325 }, { "epoch": 0.6473073747510917, "grad_norm": 1.7049996852874756, "learning_rate": 2.9224992289643954e-06, "loss": 0.7835, "step": 18326 }, { "epoch": 0.6473426965547996, "grad_norm": 1.5710177421569824, "learning_rate": 2.921978946317576e-06, "loss": 0.777, "step": 18327 }, { "epoch": 0.6473780183585075, "grad_norm": 1.6854051351547241, "learning_rate": 2.9214586908675068e-06, "loss": 0.756, "step": 18328 }, { "epoch": 0.6474133401622154, "grad_norm": 1.6010656356811523, "learning_rate": 2.9209384626209936e-06, "loss": 0.78, "step": 18329 }, { "epoch": 0.6474486619659233, "grad_norm": 1.701311707496643, "learning_rate": 2.920418261584849e-06, "loss": 0.7787, "step": 18330 }, { "epoch": 0.6474839837696312, "grad_norm": 1.6684539318084717, "learning_rate": 2.9198980877658776e-06, "loss": 0.8074, "step": 18331 }, { "epoch": 0.6475193055733391, "grad_norm": 1.6489536762237549, "learning_rate": 2.91937794117089e-06, "loss": 0.7605, "step": 18332 }, { "epoch": 0.647554627377047, "grad_norm": 1.7138118743896484, "learning_rate": 2.9188578218066916e-06, "loss": 0.773, "step": 18333 }, { "epoch": 0.647589949180755, "grad_norm": 1.6119681596755981, "learning_rate": 2.918337729680091e-06, "loss": 0.7848, "step": 18334 }, { "epoch": 0.6476252709844629, "grad_norm": 1.8511418104171753, "learning_rate": 2.917817664797896e-06, "loss": 0.8071, "step": 18335 }, { "epoch": 0.6476605927881707, "grad_norm": 1.5571174621582031, "learning_rate": 2.9172976271669108e-06, "loss": 0.7691, "step": 18336 }, { "epoch": 0.6476959145918786, "grad_norm": 1.7816365957260132, "learning_rate": 2.9167776167939425e-06, "loss": 0.763, "step": 18337 }, { "epoch": 0.6477312363955865, "grad_norm": 1.635352611541748, "learning_rate": 2.9162576336857997e-06, "loss": 0.7583, "step": 18338 }, { "epoch": 0.6477665581992944, "grad_norm": 1.7618986368179321, "learning_rate": 2.9157376778492823e-06, "loss": 0.7246, "step": 18339 }, { "epoch": 0.6478018800030023, "grad_norm": 1.7655671834945679, "learning_rate": 2.9152177492911993e-06, "loss": 0.7948, "step": 18340 }, { "epoch": 0.6478372018067102, "grad_norm": 1.6673552989959717, "learning_rate": 2.9146978480183557e-06, "loss": 0.772, "step": 18341 }, { "epoch": 0.6478725236104181, "grad_norm": 1.6151543855667114, "learning_rate": 2.914177974037553e-06, "loss": 0.8189, "step": 18342 }, { "epoch": 0.647907845414126, "grad_norm": 1.6247447729110718, "learning_rate": 2.913658127355597e-06, "loss": 0.784, "step": 18343 }, { "epoch": 0.647943167217834, "grad_norm": 1.5324939489364624, "learning_rate": 2.913138307979293e-06, "loss": 0.7333, "step": 18344 }, { "epoch": 0.6479784890215419, "grad_norm": 1.7477166652679443, "learning_rate": 2.912618515915441e-06, "loss": 0.784, "step": 18345 }, { "epoch": 0.6480138108252498, "grad_norm": 1.7775137424468994, "learning_rate": 2.9120987511708447e-06, "loss": 0.7795, "step": 18346 }, { "epoch": 0.6480491326289577, "grad_norm": 1.6878365278244019, "learning_rate": 2.9115790137523103e-06, "loss": 0.7524, "step": 18347 }, { "epoch": 0.6480844544326656, "grad_norm": 1.639164924621582, "learning_rate": 2.9110593036666368e-06, "loss": 0.7621, "step": 18348 }, { "epoch": 0.6481197762363735, "grad_norm": 1.8967441320419312, "learning_rate": 2.9105396209206245e-06, "loss": 0.8018, "step": 18349 }, { "epoch": 0.6481550980400814, "grad_norm": 1.4688392877578735, "learning_rate": 2.910019965521077e-06, "loss": 0.7501, "step": 18350 }, { "epoch": 0.6481904198437893, "grad_norm": 1.6784240007400513, "learning_rate": 2.9095003374747964e-06, "loss": 0.7906, "step": 18351 }, { "epoch": 0.6482257416474972, "grad_norm": 1.8044692277908325, "learning_rate": 2.9089807367885813e-06, "loss": 0.7622, "step": 18352 }, { "epoch": 0.6482610634512052, "grad_norm": 1.6624456644058228, "learning_rate": 2.9084611634692334e-06, "loss": 0.7598, "step": 18353 }, { "epoch": 0.6482963852549131, "grad_norm": 1.6727324724197388, "learning_rate": 2.9079416175235543e-06, "loss": 0.74, "step": 18354 }, { "epoch": 0.648331707058621, "grad_norm": 2.5912091732025146, "learning_rate": 2.90742209895834e-06, "loss": 0.7862, "step": 18355 }, { "epoch": 0.6483670288623289, "grad_norm": 1.5831636190414429, "learning_rate": 2.906902607780392e-06, "loss": 0.7795, "step": 18356 }, { "epoch": 0.6484023506660368, "grad_norm": 1.6403980255126953, "learning_rate": 2.906383143996511e-06, "loss": 0.7813, "step": 18357 }, { "epoch": 0.6484376724697447, "grad_norm": 2.2215123176574707, "learning_rate": 2.905863707613492e-06, "loss": 0.7573, "step": 18358 }, { "epoch": 0.6484729942734526, "grad_norm": 1.8603774309158325, "learning_rate": 2.9053442986381355e-06, "loss": 0.7606, "step": 18359 }, { "epoch": 0.6485083160771605, "grad_norm": 1.6634135246276855, "learning_rate": 2.9048249170772407e-06, "loss": 0.8101, "step": 18360 }, { "epoch": 0.6485436378808684, "grad_norm": 1.589881420135498, "learning_rate": 2.9043055629376015e-06, "loss": 0.7736, "step": 18361 }, { "epoch": 0.6485789596845762, "grad_norm": 2.0465362071990967, "learning_rate": 2.9037862362260173e-06, "loss": 0.8021, "step": 18362 }, { "epoch": 0.6486142814882842, "grad_norm": 1.706921935081482, "learning_rate": 2.9032669369492873e-06, "loss": 0.7828, "step": 18363 }, { "epoch": 0.6486496032919921, "grad_norm": 1.8041296005249023, "learning_rate": 2.9027476651142027e-06, "loss": 0.7801, "step": 18364 }, { "epoch": 0.6486849250957, "grad_norm": 1.760561466217041, "learning_rate": 2.9022284207275624e-06, "loss": 0.7894, "step": 18365 }, { "epoch": 0.6487202468994079, "grad_norm": 1.7078245878219604, "learning_rate": 2.9017092037961645e-06, "loss": 0.7585, "step": 18366 }, { "epoch": 0.6487555687031158, "grad_norm": 1.8968387842178345, "learning_rate": 2.9011900143268025e-06, "loss": 0.7911, "step": 18367 }, { "epoch": 0.6487908905068237, "grad_norm": 1.7865136861801147, "learning_rate": 2.9006708523262684e-06, "loss": 0.7828, "step": 18368 }, { "epoch": 0.6488262123105316, "grad_norm": 1.7404628992080688, "learning_rate": 2.9001517178013595e-06, "loss": 0.7669, "step": 18369 }, { "epoch": 0.6488615341142395, "grad_norm": 1.7802958488464355, "learning_rate": 2.8996326107588726e-06, "loss": 0.7873, "step": 18370 }, { "epoch": 0.6488968559179474, "grad_norm": 1.6025927066802979, "learning_rate": 2.899113531205597e-06, "loss": 0.7879, "step": 18371 }, { "epoch": 0.6489321777216553, "grad_norm": 0.9585731029510498, "learning_rate": 2.898594479148329e-06, "loss": 0.5496, "step": 18372 }, { "epoch": 0.6489674995253633, "grad_norm": 1.9207775592803955, "learning_rate": 2.8980754545938628e-06, "loss": 0.776, "step": 18373 }, { "epoch": 0.6490028213290712, "grad_norm": 1.615684986114502, "learning_rate": 2.897556457548988e-06, "loss": 0.7636, "step": 18374 }, { "epoch": 0.6490381431327791, "grad_norm": 1.6130783557891846, "learning_rate": 2.8970374880205e-06, "loss": 0.797, "step": 18375 }, { "epoch": 0.649073464936487, "grad_norm": 1.7615470886230469, "learning_rate": 2.8965185460151914e-06, "loss": 0.7671, "step": 18376 }, { "epoch": 0.6491087867401949, "grad_norm": 2.9066169261932373, "learning_rate": 2.8959996315398496e-06, "loss": 0.7717, "step": 18377 }, { "epoch": 0.6491441085439028, "grad_norm": 1.705305576324463, "learning_rate": 2.8954807446012707e-06, "loss": 0.7593, "step": 18378 }, { "epoch": 0.6491794303476107, "grad_norm": 1.617594599723816, "learning_rate": 2.8949618852062457e-06, "loss": 0.786, "step": 18379 }, { "epoch": 0.6492147521513186, "grad_norm": 3.527698278427124, "learning_rate": 2.894443053361562e-06, "loss": 0.7439, "step": 18380 }, { "epoch": 0.6492500739550265, "grad_norm": 1.7574068307876587, "learning_rate": 2.8939242490740118e-06, "loss": 0.7978, "step": 18381 }, { "epoch": 0.6492853957587345, "grad_norm": 1.667848825454712, "learning_rate": 2.8934054723503846e-06, "loss": 0.7703, "step": 18382 }, { "epoch": 0.6493207175624424, "grad_norm": 1.5772978067398071, "learning_rate": 2.8928867231974727e-06, "loss": 0.793, "step": 18383 }, { "epoch": 0.6493560393661503, "grad_norm": 1.906990647315979, "learning_rate": 2.8923680016220613e-06, "loss": 0.8128, "step": 18384 }, { "epoch": 0.6493913611698582, "grad_norm": 1.6477595567703247, "learning_rate": 2.891849307630944e-06, "loss": 0.7679, "step": 18385 }, { "epoch": 0.6494266829735661, "grad_norm": 1.7385094165802002, "learning_rate": 2.891330641230906e-06, "loss": 0.7773, "step": 18386 }, { "epoch": 0.649462004777274, "grad_norm": 1.734106183052063, "learning_rate": 2.8908120024287347e-06, "loss": 0.782, "step": 18387 }, { "epoch": 0.6494973265809818, "grad_norm": 1.6486506462097168, "learning_rate": 2.8902933912312188e-06, "loss": 0.7526, "step": 18388 }, { "epoch": 0.6495326483846897, "grad_norm": 1.8323237895965576, "learning_rate": 2.889774807645148e-06, "loss": 0.8004, "step": 18389 }, { "epoch": 0.6495679701883976, "grad_norm": 1.5380339622497559, "learning_rate": 2.889256251677307e-06, "loss": 0.7605, "step": 18390 }, { "epoch": 0.6496032919921055, "grad_norm": 1.5716229677200317, "learning_rate": 2.888737723334483e-06, "loss": 0.7742, "step": 18391 }, { "epoch": 0.6496386137958134, "grad_norm": 1.6399375200271606, "learning_rate": 2.888219222623463e-06, "loss": 0.7902, "step": 18392 }, { "epoch": 0.6496739355995214, "grad_norm": 1.710222601890564, "learning_rate": 2.8877007495510347e-06, "loss": 0.7852, "step": 18393 }, { "epoch": 0.6497092574032293, "grad_norm": 1.5225664377212524, "learning_rate": 2.887182304123979e-06, "loss": 0.7803, "step": 18394 }, { "epoch": 0.6497445792069372, "grad_norm": 1.7060760259628296, "learning_rate": 2.8866638863490846e-06, "loss": 0.7979, "step": 18395 }, { "epoch": 0.6497799010106451, "grad_norm": 1.6639816761016846, "learning_rate": 2.886145496233139e-06, "loss": 0.7645, "step": 18396 }, { "epoch": 0.649815222814353, "grad_norm": 1.5500080585479736, "learning_rate": 2.885627133782921e-06, "loss": 0.7761, "step": 18397 }, { "epoch": 0.6498505446180609, "grad_norm": 1.7216496467590332, "learning_rate": 2.8851087990052173e-06, "loss": 0.7968, "step": 18398 }, { "epoch": 0.6498858664217688, "grad_norm": 1.8020389080047607, "learning_rate": 2.8845904919068146e-06, "loss": 0.8294, "step": 18399 }, { "epoch": 0.6499211882254767, "grad_norm": 1.796897053718567, "learning_rate": 2.884072212494492e-06, "loss": 0.806, "step": 18400 }, { "epoch": 0.6499565100291846, "grad_norm": 1.5837153196334839, "learning_rate": 2.8835539607750344e-06, "loss": 0.7689, "step": 18401 }, { "epoch": 0.6499918318328926, "grad_norm": 1.8389050960540771, "learning_rate": 2.8830357367552262e-06, "loss": 0.8308, "step": 18402 }, { "epoch": 0.6500271536366005, "grad_norm": 1.61794912815094, "learning_rate": 2.882517540441847e-06, "loss": 0.7984, "step": 18403 }, { "epoch": 0.6500624754403084, "grad_norm": 1.5607812404632568, "learning_rate": 2.8819993718416823e-06, "loss": 0.7599, "step": 18404 }, { "epoch": 0.6500977972440163, "grad_norm": 1.643194317817688, "learning_rate": 2.881481230961509e-06, "loss": 0.7823, "step": 18405 }, { "epoch": 0.6501331190477242, "grad_norm": 1.6266781091690063, "learning_rate": 2.880963117808114e-06, "loss": 0.7628, "step": 18406 }, { "epoch": 0.6501684408514321, "grad_norm": 1.7400221824645996, "learning_rate": 2.8804450323882726e-06, "loss": 0.8144, "step": 18407 }, { "epoch": 0.65020376265514, "grad_norm": 1.755679726600647, "learning_rate": 2.8799269747087683e-06, "loss": 0.7951, "step": 18408 }, { "epoch": 0.6502390844588479, "grad_norm": 1.7024316787719727, "learning_rate": 2.8794089447763834e-06, "loss": 0.7787, "step": 18409 }, { "epoch": 0.6502744062625558, "grad_norm": 1.5936884880065918, "learning_rate": 2.8788909425978935e-06, "loss": 0.7851, "step": 18410 }, { "epoch": 0.6503097280662637, "grad_norm": 1.5980433225631714, "learning_rate": 2.8783729681800805e-06, "loss": 0.8058, "step": 18411 }, { "epoch": 0.6503450498699717, "grad_norm": 1.5646648406982422, "learning_rate": 2.8778550215297253e-06, "loss": 0.763, "step": 18412 }, { "epoch": 0.6503803716736796, "grad_norm": 1.6827099323272705, "learning_rate": 2.8773371026536016e-06, "loss": 0.7864, "step": 18413 }, { "epoch": 0.6504156934773874, "grad_norm": 1.7389558553695679, "learning_rate": 2.876819211558492e-06, "loss": 0.7703, "step": 18414 }, { "epoch": 0.6504510152810953, "grad_norm": 1.8600239753723145, "learning_rate": 2.8763013482511746e-06, "loss": 0.8057, "step": 18415 }, { "epoch": 0.6504863370848032, "grad_norm": 1.6951656341552734, "learning_rate": 2.8757835127384247e-06, "loss": 0.7963, "step": 18416 }, { "epoch": 0.6505216588885111, "grad_norm": 1.7994413375854492, "learning_rate": 2.8752657050270206e-06, "loss": 0.7815, "step": 18417 }, { "epoch": 0.650556980692219, "grad_norm": 1.7440149784088135, "learning_rate": 2.874747925123742e-06, "loss": 0.7705, "step": 18418 }, { "epoch": 0.6505923024959269, "grad_norm": 1.5227915048599243, "learning_rate": 2.8742301730353607e-06, "loss": 0.7755, "step": 18419 }, { "epoch": 0.6506276242996348, "grad_norm": 1.584746241569519, "learning_rate": 2.8737124487686552e-06, "loss": 0.7767, "step": 18420 }, { "epoch": 0.6506629461033427, "grad_norm": 1.7992072105407715, "learning_rate": 2.8731947523304033e-06, "loss": 0.7616, "step": 18421 }, { "epoch": 0.6506982679070507, "grad_norm": 1.806650996208191, "learning_rate": 2.8726770837273786e-06, "loss": 0.8001, "step": 18422 }, { "epoch": 0.6507335897107586, "grad_norm": 2.0473663806915283, "learning_rate": 2.8721594429663547e-06, "loss": 0.7929, "step": 18423 }, { "epoch": 0.6507689115144665, "grad_norm": 1.6429170370101929, "learning_rate": 2.871641830054108e-06, "loss": 0.8003, "step": 18424 }, { "epoch": 0.6508042333181744, "grad_norm": 1.6226775646209717, "learning_rate": 2.8711242449974154e-06, "loss": 0.7752, "step": 18425 }, { "epoch": 0.6508395551218823, "grad_norm": 1.9635573625564575, "learning_rate": 2.870606687803046e-06, "loss": 0.7721, "step": 18426 }, { "epoch": 0.6508748769255902, "grad_norm": 2.1032254695892334, "learning_rate": 2.870089158477776e-06, "loss": 0.7879, "step": 18427 }, { "epoch": 0.6509101987292981, "grad_norm": 1.9422874450683594, "learning_rate": 2.8695716570283803e-06, "loss": 0.7671, "step": 18428 }, { "epoch": 0.650945520533006, "grad_norm": 1.72543466091156, "learning_rate": 2.869054183461629e-06, "loss": 0.7702, "step": 18429 }, { "epoch": 0.6509808423367139, "grad_norm": 1.6536331176757812, "learning_rate": 2.8685367377842952e-06, "loss": 0.7751, "step": 18430 }, { "epoch": 0.6510161641404218, "grad_norm": 1.5958988666534424, "learning_rate": 2.868019320003154e-06, "loss": 0.7947, "step": 18431 }, { "epoch": 0.6510514859441298, "grad_norm": 1.677335500717163, "learning_rate": 2.867501930124974e-06, "loss": 0.7707, "step": 18432 }, { "epoch": 0.6510868077478377, "grad_norm": 1.8952583074569702, "learning_rate": 2.866984568156527e-06, "loss": 0.7859, "step": 18433 }, { "epoch": 0.6511221295515456, "grad_norm": 1.5187526941299438, "learning_rate": 2.8664672341045873e-06, "loss": 0.7613, "step": 18434 }, { "epoch": 0.6511574513552535, "grad_norm": 1.6613483428955078, "learning_rate": 2.8659499279759216e-06, "loss": 0.7667, "step": 18435 }, { "epoch": 0.6511927731589614, "grad_norm": 1.8679022789001465, "learning_rate": 2.865432649777302e-06, "loss": 0.8087, "step": 18436 }, { "epoch": 0.6512280949626693, "grad_norm": 1.4907246828079224, "learning_rate": 2.8649153995155012e-06, "loss": 0.736, "step": 18437 }, { "epoch": 0.6512634167663772, "grad_norm": 1.9547107219696045, "learning_rate": 2.864398177197283e-06, "loss": 0.7743, "step": 18438 }, { "epoch": 0.6512987385700851, "grad_norm": 1.7619545459747314, "learning_rate": 2.863880982829421e-06, "loss": 0.7765, "step": 18439 }, { "epoch": 0.6513340603737929, "grad_norm": 1.56527578830719, "learning_rate": 2.863363816418685e-06, "loss": 0.7499, "step": 18440 }, { "epoch": 0.6513693821775008, "grad_norm": 1.6834697723388672, "learning_rate": 2.8628466779718413e-06, "loss": 0.7519, "step": 18441 }, { "epoch": 0.6514047039812088, "grad_norm": 1.6985949277877808, "learning_rate": 2.8623295674956565e-06, "loss": 0.799, "step": 18442 }, { "epoch": 0.6514400257849167, "grad_norm": 1.6523603200912476, "learning_rate": 2.861812484996901e-06, "loss": 0.7638, "step": 18443 }, { "epoch": 0.6514753475886246, "grad_norm": 1.5640487670898438, "learning_rate": 2.8612954304823435e-06, "loss": 0.78, "step": 18444 }, { "epoch": 0.6515106693923325, "grad_norm": 1.6893550157546997, "learning_rate": 2.860778403958747e-06, "loss": 0.7896, "step": 18445 }, { "epoch": 0.6515459911960404, "grad_norm": 2.078348398208618, "learning_rate": 2.8602614054328813e-06, "loss": 0.7704, "step": 18446 }, { "epoch": 0.6515813129997483, "grad_norm": 1.5656859874725342, "learning_rate": 2.8597444349115137e-06, "loss": 0.7807, "step": 18447 }, { "epoch": 0.6516166348034562, "grad_norm": 2.4688003063201904, "learning_rate": 2.8592274924014075e-06, "loss": 0.7942, "step": 18448 }, { "epoch": 0.6516519566071641, "grad_norm": 1.5609956979751587, "learning_rate": 2.8587105779093284e-06, "loss": 0.7728, "step": 18449 }, { "epoch": 0.651687278410872, "grad_norm": 1.5983763933181763, "learning_rate": 2.858193691442045e-06, "loss": 0.7691, "step": 18450 }, { "epoch": 0.65172260021458, "grad_norm": 1.6533259153366089, "learning_rate": 2.8576768330063175e-06, "loss": 0.7701, "step": 18451 }, { "epoch": 0.6517579220182879, "grad_norm": 1.7316391468048096, "learning_rate": 2.857160002608914e-06, "loss": 0.8059, "step": 18452 }, { "epoch": 0.6517932438219958, "grad_norm": 1.571604609489441, "learning_rate": 2.856643200256597e-06, "loss": 0.7505, "step": 18453 }, { "epoch": 0.6518285656257037, "grad_norm": 1.524282455444336, "learning_rate": 2.8561264259561326e-06, "loss": 0.7767, "step": 18454 }, { "epoch": 0.6518638874294116, "grad_norm": 1.7157337665557861, "learning_rate": 2.8556096797142807e-06, "loss": 0.7948, "step": 18455 }, { "epoch": 0.6518992092331195, "grad_norm": 1.6417087316513062, "learning_rate": 2.8550929615378064e-06, "loss": 0.7879, "step": 18456 }, { "epoch": 0.6519345310368274, "grad_norm": 1.5965120792388916, "learning_rate": 2.854576271433474e-06, "loss": 0.7884, "step": 18457 }, { "epoch": 0.6519698528405353, "grad_norm": 1.7948061227798462, "learning_rate": 2.854059609408042e-06, "loss": 0.7873, "step": 18458 }, { "epoch": 0.6520051746442432, "grad_norm": 1.6632657051086426, "learning_rate": 2.853542975468277e-06, "loss": 0.789, "step": 18459 }, { "epoch": 0.6520404964479511, "grad_norm": 1.7216899394989014, "learning_rate": 2.8530263696209377e-06, "loss": 0.768, "step": 18460 }, { "epoch": 0.6520758182516591, "grad_norm": 1.6981981992721558, "learning_rate": 2.8525097918727834e-06, "loss": 0.81, "step": 18461 }, { "epoch": 0.652111140055367, "grad_norm": 1.6509953737258911, "learning_rate": 2.8519932422305774e-06, "loss": 0.7769, "step": 18462 }, { "epoch": 0.6521464618590749, "grad_norm": 1.7239313125610352, "learning_rate": 2.8514767207010807e-06, "loss": 0.7923, "step": 18463 }, { "epoch": 0.6521817836627828, "grad_norm": 1.5787914991378784, "learning_rate": 2.8509602272910542e-06, "loss": 0.7928, "step": 18464 }, { "epoch": 0.6522171054664907, "grad_norm": 1.5097686052322388, "learning_rate": 2.8504437620072544e-06, "loss": 0.7937, "step": 18465 }, { "epoch": 0.6522524272701985, "grad_norm": 1.7924118041992188, "learning_rate": 2.849927324856442e-06, "loss": 0.8035, "step": 18466 }, { "epoch": 0.6522877490739064, "grad_norm": 1.6964489221572876, "learning_rate": 2.849410915845379e-06, "loss": 0.7833, "step": 18467 }, { "epoch": 0.6523230708776143, "grad_norm": 1.7607426643371582, "learning_rate": 2.84889453498082e-06, "loss": 0.7698, "step": 18468 }, { "epoch": 0.6523583926813222, "grad_norm": 1.6950913667678833, "learning_rate": 2.8483781822695246e-06, "loss": 0.8085, "step": 18469 }, { "epoch": 0.6523937144850301, "grad_norm": 1.5607274770736694, "learning_rate": 2.8478618577182538e-06, "loss": 0.7717, "step": 18470 }, { "epoch": 0.652429036288738, "grad_norm": 1.7002208232879639, "learning_rate": 2.8473455613337596e-06, "loss": 0.7774, "step": 18471 }, { "epoch": 0.652464358092446, "grad_norm": 1.5621824264526367, "learning_rate": 2.8468292931228027e-06, "loss": 0.7794, "step": 18472 }, { "epoch": 0.6524996798961539, "grad_norm": 1.7377086877822876, "learning_rate": 2.8463130530921402e-06, "loss": 0.7857, "step": 18473 }, { "epoch": 0.6525350016998618, "grad_norm": 1.846887230873108, "learning_rate": 2.8457968412485265e-06, "loss": 0.7636, "step": 18474 }, { "epoch": 0.6525703235035697, "grad_norm": 1.712091088294983, "learning_rate": 2.845280657598719e-06, "loss": 0.7998, "step": 18475 }, { "epoch": 0.6526056453072776, "grad_norm": 2.0550355911254883, "learning_rate": 2.8447645021494747e-06, "loss": 0.7767, "step": 18476 }, { "epoch": 0.6526409671109855, "grad_norm": 1.0044879913330078, "learning_rate": 2.8442483749075456e-06, "loss": 0.6097, "step": 18477 }, { "epoch": 0.6526762889146934, "grad_norm": 1.5775556564331055, "learning_rate": 2.84373227587969e-06, "loss": 0.7454, "step": 18478 }, { "epoch": 0.6527116107184013, "grad_norm": 1.8881088495254517, "learning_rate": 2.8432162050726597e-06, "loss": 0.8148, "step": 18479 }, { "epoch": 0.6527469325221092, "grad_norm": 1.5569164752960205, "learning_rate": 2.8427001624932123e-06, "loss": 0.7627, "step": 18480 }, { "epoch": 0.6527822543258172, "grad_norm": 1.5781618356704712, "learning_rate": 2.842184148148097e-06, "loss": 0.7708, "step": 18481 }, { "epoch": 0.6528175761295251, "grad_norm": 3.694711446762085, "learning_rate": 2.8416681620440713e-06, "loss": 0.7602, "step": 18482 }, { "epoch": 0.652852897933233, "grad_norm": 1.5348906517028809, "learning_rate": 2.8411522041878874e-06, "loss": 0.7674, "step": 18483 }, { "epoch": 0.6528882197369409, "grad_norm": 1.7289597988128662, "learning_rate": 2.8406362745862963e-06, "loss": 0.8232, "step": 18484 }, { "epoch": 0.6529235415406488, "grad_norm": 1.0905284881591797, "learning_rate": 2.840120373246052e-06, "loss": 0.6057, "step": 18485 }, { "epoch": 0.6529588633443567, "grad_norm": 1.6459676027297974, "learning_rate": 2.8396045001739083e-06, "loss": 0.7637, "step": 18486 }, { "epoch": 0.6529941851480646, "grad_norm": 1.636134386062622, "learning_rate": 2.8390886553766117e-06, "loss": 0.7984, "step": 18487 }, { "epoch": 0.6530295069517725, "grad_norm": 1.7377595901489258, "learning_rate": 2.838572838860918e-06, "loss": 0.7968, "step": 18488 }, { "epoch": 0.6530648287554804, "grad_norm": 1.6819409132003784, "learning_rate": 2.838057050633578e-06, "loss": 0.7752, "step": 18489 }, { "epoch": 0.6531001505591884, "grad_norm": 1.7050926685333252, "learning_rate": 2.837541290701339e-06, "loss": 0.7829, "step": 18490 }, { "epoch": 0.6531354723628963, "grad_norm": 1.6988201141357422, "learning_rate": 2.8370255590709527e-06, "loss": 0.7718, "step": 18491 }, { "epoch": 0.6531707941666041, "grad_norm": 1.6693410873413086, "learning_rate": 2.8365098557491716e-06, "loss": 0.7896, "step": 18492 }, { "epoch": 0.653206115970312, "grad_norm": 1.7230228185653687, "learning_rate": 2.8359941807427404e-06, "loss": 0.7599, "step": 18493 }, { "epoch": 0.6532414377740199, "grad_norm": 1.6465660333633423, "learning_rate": 2.835478534058411e-06, "loss": 0.7827, "step": 18494 }, { "epoch": 0.6532767595777278, "grad_norm": 1.574436068534851, "learning_rate": 2.8349629157029342e-06, "loss": 0.7874, "step": 18495 }, { "epoch": 0.6533120813814357, "grad_norm": 1.7732903957366943, "learning_rate": 2.834447325683055e-06, "loss": 0.7803, "step": 18496 }, { "epoch": 0.6533474031851436, "grad_norm": 1.5704681873321533, "learning_rate": 2.8339317640055202e-06, "loss": 0.8018, "step": 18497 }, { "epoch": 0.6533827249888515, "grad_norm": 1.7814292907714844, "learning_rate": 2.8334162306770797e-06, "loss": 0.7361, "step": 18498 }, { "epoch": 0.6534180467925594, "grad_norm": 1.8552801609039307, "learning_rate": 2.832900725704482e-06, "loss": 0.7872, "step": 18499 }, { "epoch": 0.6534533685962673, "grad_norm": 1.6910836696624756, "learning_rate": 2.8323852490944704e-06, "loss": 0.8054, "step": 18500 }, { "epoch": 0.6534886903999753, "grad_norm": 1.649247169494629, "learning_rate": 2.8318698008537928e-06, "loss": 0.7892, "step": 18501 }, { "epoch": 0.6535240122036832, "grad_norm": 1.8327107429504395, "learning_rate": 2.8313543809891988e-06, "loss": 0.79, "step": 18502 }, { "epoch": 0.6535593340073911, "grad_norm": 1.6951580047607422, "learning_rate": 2.8308389895074273e-06, "loss": 0.7266, "step": 18503 }, { "epoch": 0.653594655811099, "grad_norm": 1.5787969827651978, "learning_rate": 2.8303236264152284e-06, "loss": 0.7892, "step": 18504 }, { "epoch": 0.6536299776148069, "grad_norm": 1.7416151762008667, "learning_rate": 2.829808291719348e-06, "loss": 0.7944, "step": 18505 }, { "epoch": 0.6536652994185148, "grad_norm": 1.5589263439178467, "learning_rate": 2.829292985426526e-06, "loss": 0.7536, "step": 18506 }, { "epoch": 0.6537006212222227, "grad_norm": 1.5870959758758545, "learning_rate": 2.8287777075435106e-06, "loss": 0.7778, "step": 18507 }, { "epoch": 0.6537359430259306, "grad_norm": 1.7116833925247192, "learning_rate": 2.8282624580770458e-06, "loss": 0.791, "step": 18508 }, { "epoch": 0.6537712648296385, "grad_norm": 1.5938382148742676, "learning_rate": 2.827747237033871e-06, "loss": 0.7789, "step": 18509 }, { "epoch": 0.6538065866333465, "grad_norm": 1.8937873840332031, "learning_rate": 2.827232044420733e-06, "loss": 0.7608, "step": 18510 }, { "epoch": 0.6538419084370544, "grad_norm": 2.023540496826172, "learning_rate": 2.8267168802443735e-06, "loss": 0.7616, "step": 18511 }, { "epoch": 0.6538772302407623, "grad_norm": 1.6107444763183594, "learning_rate": 2.8262017445115364e-06, "loss": 0.7634, "step": 18512 }, { "epoch": 0.6539125520444702, "grad_norm": 1.5710384845733643, "learning_rate": 2.8256866372289602e-06, "loss": 0.75, "step": 18513 }, { "epoch": 0.6539478738481781, "grad_norm": 1.7381911277770996, "learning_rate": 2.825171558403389e-06, "loss": 0.7915, "step": 18514 }, { "epoch": 0.653983195651886, "grad_norm": 1.608977198600769, "learning_rate": 2.8246565080415677e-06, "loss": 0.7802, "step": 18515 }, { "epoch": 0.6540185174555939, "grad_norm": 2.299508810043335, "learning_rate": 2.824141486150229e-06, "loss": 0.736, "step": 18516 }, { "epoch": 0.6540538392593018, "grad_norm": 1.6700587272644043, "learning_rate": 2.8236264927361173e-06, "loss": 0.7658, "step": 18517 }, { "epoch": 0.6540891610630096, "grad_norm": 1.6271249055862427, "learning_rate": 2.8231115278059753e-06, "loss": 0.776, "step": 18518 }, { "epoch": 0.6541244828667175, "grad_norm": 1.5766775608062744, "learning_rate": 2.8225965913665377e-06, "loss": 0.7351, "step": 18519 }, { "epoch": 0.6541598046704254, "grad_norm": 1.5619417428970337, "learning_rate": 2.8220816834245475e-06, "loss": 0.7648, "step": 18520 }, { "epoch": 0.6541951264741334, "grad_norm": 1.599561095237732, "learning_rate": 2.8215668039867423e-06, "loss": 0.7462, "step": 18521 }, { "epoch": 0.6542304482778413, "grad_norm": 1.62209153175354, "learning_rate": 2.8210519530598633e-06, "loss": 0.7999, "step": 18522 }, { "epoch": 0.6542657700815492, "grad_norm": 1.6634715795516968, "learning_rate": 2.8205371306506447e-06, "loss": 0.7747, "step": 18523 }, { "epoch": 0.6543010918852571, "grad_norm": 1.8278937339782715, "learning_rate": 2.8200223367658264e-06, "loss": 0.8023, "step": 18524 }, { "epoch": 0.654336413688965, "grad_norm": 1.7849277257919312, "learning_rate": 2.8195075714121483e-06, "loss": 0.7774, "step": 18525 }, { "epoch": 0.6543717354926729, "grad_norm": 1.5804210901260376, "learning_rate": 2.8189928345963435e-06, "loss": 0.7597, "step": 18526 }, { "epoch": 0.6544070572963808, "grad_norm": 1.8992072343826294, "learning_rate": 2.81847812632515e-06, "loss": 0.7747, "step": 18527 }, { "epoch": 0.6544423791000887, "grad_norm": 1.8456766605377197, "learning_rate": 2.8179634466053064e-06, "loss": 0.8186, "step": 18528 }, { "epoch": 0.6544777009037966, "grad_norm": 1.7508900165557861, "learning_rate": 2.8174487954435454e-06, "loss": 0.799, "step": 18529 }, { "epoch": 0.6545130227075046, "grad_norm": 1.6263357400894165, "learning_rate": 2.8169341728466043e-06, "loss": 0.789, "step": 18530 }, { "epoch": 0.6545483445112125, "grad_norm": 1.5261688232421875, "learning_rate": 2.81641957882122e-06, "loss": 0.7608, "step": 18531 }, { "epoch": 0.6545836663149204, "grad_norm": 1.6237199306488037, "learning_rate": 2.8159050133741246e-06, "loss": 0.7874, "step": 18532 }, { "epoch": 0.6546189881186283, "grad_norm": 1.7182462215423584, "learning_rate": 2.8153904765120555e-06, "loss": 0.7979, "step": 18533 }, { "epoch": 0.6546543099223362, "grad_norm": 2.106459856033325, "learning_rate": 2.8148759682417428e-06, "loss": 0.7814, "step": 18534 }, { "epoch": 0.6546896317260441, "grad_norm": 1.683117389678955, "learning_rate": 2.8143614885699246e-06, "loss": 0.7702, "step": 18535 }, { "epoch": 0.654724953529752, "grad_norm": 1.0340368747711182, "learning_rate": 2.813847037503331e-06, "loss": 0.5576, "step": 18536 }, { "epoch": 0.6547602753334599, "grad_norm": 1.54826819896698, "learning_rate": 2.813332615048696e-06, "loss": 0.7795, "step": 18537 }, { "epoch": 0.6547955971371678, "grad_norm": 1.6265828609466553, "learning_rate": 2.8128182212127547e-06, "loss": 0.7538, "step": 18538 }, { "epoch": 0.6548309189408758, "grad_norm": 1.619424819946289, "learning_rate": 2.8123038560022355e-06, "loss": 0.7556, "step": 18539 }, { "epoch": 0.6548662407445837, "grad_norm": 1.6290862560272217, "learning_rate": 2.811789519423872e-06, "loss": 0.7682, "step": 18540 }, { "epoch": 0.6549015625482916, "grad_norm": 1.6666392087936401, "learning_rate": 2.8112752114843983e-06, "loss": 0.7699, "step": 18541 }, { "epoch": 0.6549368843519995, "grad_norm": 1.6574022769927979, "learning_rate": 2.810760932190541e-06, "loss": 0.749, "step": 18542 }, { "epoch": 0.6549722061557074, "grad_norm": 1.7469760179519653, "learning_rate": 2.8102466815490332e-06, "loss": 0.7488, "step": 18543 }, { "epoch": 0.6550075279594152, "grad_norm": 1.8949729204177856, "learning_rate": 2.8097324595666063e-06, "loss": 0.7563, "step": 18544 }, { "epoch": 0.6550428497631231, "grad_norm": 1.9994643926620483, "learning_rate": 2.8092182662499888e-06, "loss": 0.7574, "step": 18545 }, { "epoch": 0.655078171566831, "grad_norm": 1.7690255641937256, "learning_rate": 2.8087041016059096e-06, "loss": 0.7943, "step": 18546 }, { "epoch": 0.6551134933705389, "grad_norm": 1.7192076444625854, "learning_rate": 2.8081899656411014e-06, "loss": 0.7825, "step": 18547 }, { "epoch": 0.6551488151742468, "grad_norm": 1.6291674375534058, "learning_rate": 2.8076758583622897e-06, "loss": 0.764, "step": 18548 }, { "epoch": 0.6551841369779547, "grad_norm": 1.5910090208053589, "learning_rate": 2.8071617797762036e-06, "loss": 0.7523, "step": 18549 }, { "epoch": 0.6552194587816627, "grad_norm": 1.701146125793457, "learning_rate": 2.806647729889574e-06, "loss": 0.7889, "step": 18550 }, { "epoch": 0.6552547805853706, "grad_norm": 1.956441044807434, "learning_rate": 2.8061337087091267e-06, "loss": 0.7938, "step": 18551 }, { "epoch": 0.6552901023890785, "grad_norm": 1.651228904724121, "learning_rate": 2.8056197162415867e-06, "loss": 0.7741, "step": 18552 }, { "epoch": 0.6553254241927864, "grad_norm": 1.7255334854125977, "learning_rate": 2.8051057524936836e-06, "loss": 0.7938, "step": 18553 }, { "epoch": 0.6553607459964943, "grad_norm": 1.7160805463790894, "learning_rate": 2.804591817472146e-06, "loss": 0.7694, "step": 18554 }, { "epoch": 0.6553960678002022, "grad_norm": 1.6465084552764893, "learning_rate": 2.8040779111836956e-06, "loss": 0.7501, "step": 18555 }, { "epoch": 0.6554313896039101, "grad_norm": 1.5453271865844727, "learning_rate": 2.8035640336350613e-06, "loss": 0.7433, "step": 18556 }, { "epoch": 0.655466711407618, "grad_norm": 1.5427714586257935, "learning_rate": 2.8030501848329693e-06, "loss": 0.7529, "step": 18557 }, { "epoch": 0.6555020332113259, "grad_norm": 1.581799030303955, "learning_rate": 2.802536364784142e-06, "loss": 0.7664, "step": 18558 }, { "epoch": 0.6555373550150339, "grad_norm": 1.7622487545013428, "learning_rate": 2.8020225734953055e-06, "loss": 0.8052, "step": 18559 }, { "epoch": 0.6555726768187418, "grad_norm": 1.727463722229004, "learning_rate": 2.8015088109731864e-06, "loss": 0.7651, "step": 18560 }, { "epoch": 0.6556079986224497, "grad_norm": 1.9103294610977173, "learning_rate": 2.800995077224504e-06, "loss": 0.7822, "step": 18561 }, { "epoch": 0.6556433204261576, "grad_norm": 1.7735003232955933, "learning_rate": 2.800481372255986e-06, "loss": 0.7574, "step": 18562 }, { "epoch": 0.6556786422298655, "grad_norm": 1.6464529037475586, "learning_rate": 2.799967696074356e-06, "loss": 0.7523, "step": 18563 }, { "epoch": 0.6557139640335734, "grad_norm": 1.7389320135116577, "learning_rate": 2.7994540486863332e-06, "loss": 0.8004, "step": 18564 }, { "epoch": 0.6557492858372813, "grad_norm": 1.6031981706619263, "learning_rate": 2.798940430098641e-06, "loss": 0.7844, "step": 18565 }, { "epoch": 0.6557846076409892, "grad_norm": 1.6504266262054443, "learning_rate": 2.798426840318006e-06, "loss": 0.7809, "step": 18566 }, { "epoch": 0.6558199294446971, "grad_norm": 1.9116313457489014, "learning_rate": 2.797913279351144e-06, "loss": 0.7912, "step": 18567 }, { "epoch": 0.655855251248405, "grad_norm": 1.6113730669021606, "learning_rate": 2.7973997472047797e-06, "loss": 0.7734, "step": 18568 }, { "epoch": 0.655890573052113, "grad_norm": 1.6935282945632935, "learning_rate": 2.796886243885633e-06, "loss": 0.7918, "step": 18569 }, { "epoch": 0.6559258948558209, "grad_norm": 1.6478698253631592, "learning_rate": 2.7963727694004296e-06, "loss": 0.7609, "step": 18570 }, { "epoch": 0.6559612166595287, "grad_norm": 1.733087420463562, "learning_rate": 2.79585932375588e-06, "loss": 0.7464, "step": 18571 }, { "epoch": 0.6559965384632366, "grad_norm": 1.5358469486236572, "learning_rate": 2.7953459069587096e-06, "loss": 0.7533, "step": 18572 }, { "epoch": 0.6560318602669445, "grad_norm": 1.7781212329864502, "learning_rate": 2.79483251901564e-06, "loss": 0.7555, "step": 18573 }, { "epoch": 0.6560671820706524, "grad_norm": 1.8315180540084839, "learning_rate": 2.7943191599333865e-06, "loss": 0.7498, "step": 18574 }, { "epoch": 0.6561025038743603, "grad_norm": 1.6440948247909546, "learning_rate": 2.7938058297186678e-06, "loss": 0.7486, "step": 18575 }, { "epoch": 0.6561378256780682, "grad_norm": 1.7889050245285034, "learning_rate": 2.7932925283782066e-06, "loss": 0.8045, "step": 18576 }, { "epoch": 0.6561731474817761, "grad_norm": 1.7401831150054932, "learning_rate": 2.7927792559187156e-06, "loss": 0.7828, "step": 18577 }, { "epoch": 0.656208469285484, "grad_norm": 1.6469509601593018, "learning_rate": 2.792266012346915e-06, "loss": 0.8176, "step": 18578 }, { "epoch": 0.656243791089192, "grad_norm": 1.7226537466049194, "learning_rate": 2.7917527976695214e-06, "loss": 0.7867, "step": 18579 }, { "epoch": 0.6562791128928999, "grad_norm": 1.6722651720046997, "learning_rate": 2.7912396118932545e-06, "loss": 0.7719, "step": 18580 }, { "epoch": 0.6563144346966078, "grad_norm": 1.4893244504928589, "learning_rate": 2.790726455024826e-06, "loss": 0.7836, "step": 18581 }, { "epoch": 0.6563497565003157, "grad_norm": 1.741713285446167, "learning_rate": 2.790213327070954e-06, "loss": 0.8128, "step": 18582 }, { "epoch": 0.6563850783040236, "grad_norm": 1.6719462871551514, "learning_rate": 2.789700228038358e-06, "loss": 0.7936, "step": 18583 }, { "epoch": 0.6564204001077315, "grad_norm": 1.749570369720459, "learning_rate": 2.789187157933747e-06, "loss": 0.774, "step": 18584 }, { "epoch": 0.6564557219114394, "grad_norm": 1.8146950006484985, "learning_rate": 2.788674116763839e-06, "loss": 0.8167, "step": 18585 }, { "epoch": 0.6564910437151473, "grad_norm": 1.8085384368896484, "learning_rate": 2.78816110453535e-06, "loss": 0.7909, "step": 18586 }, { "epoch": 0.6565263655188552, "grad_norm": 1.7608619928359985, "learning_rate": 2.787648121254991e-06, "loss": 0.7931, "step": 18587 }, { "epoch": 0.6565616873225631, "grad_norm": 1.565718173980713, "learning_rate": 2.78713516692948e-06, "loss": 0.8076, "step": 18588 }, { "epoch": 0.6565970091262711, "grad_norm": 1.767008662223816, "learning_rate": 2.786622241565525e-06, "loss": 0.7864, "step": 18589 }, { "epoch": 0.656632330929979, "grad_norm": 1.4643760919570923, "learning_rate": 2.786109345169845e-06, "loss": 0.7759, "step": 18590 }, { "epoch": 0.6566676527336869, "grad_norm": 1.8268811702728271, "learning_rate": 2.785596477749147e-06, "loss": 0.7457, "step": 18591 }, { "epoch": 0.6567029745373948, "grad_norm": 2.4589896202087402, "learning_rate": 2.7850836393101465e-06, "loss": 0.7719, "step": 18592 }, { "epoch": 0.6567382963411027, "grad_norm": 1.6445417404174805, "learning_rate": 2.7845708298595575e-06, "loss": 0.8253, "step": 18593 }, { "epoch": 0.6567736181448106, "grad_norm": 1.8438010215759277, "learning_rate": 2.784058049404086e-06, "loss": 0.7928, "step": 18594 }, { "epoch": 0.6568089399485185, "grad_norm": 1.7569431066513062, "learning_rate": 2.7835452979504464e-06, "loss": 0.8089, "step": 18595 }, { "epoch": 0.6568442617522264, "grad_norm": 1.552100658416748, "learning_rate": 2.7830325755053517e-06, "loss": 0.8035, "step": 18596 }, { "epoch": 0.6568795835559342, "grad_norm": 1.7103053331375122, "learning_rate": 2.782519882075508e-06, "loss": 0.7906, "step": 18597 }, { "epoch": 0.6569149053596421, "grad_norm": 1.5073875188827515, "learning_rate": 2.7820072176676267e-06, "loss": 0.7424, "step": 18598 }, { "epoch": 0.65695022716335, "grad_norm": 1.6163127422332764, "learning_rate": 2.78149458228842e-06, "loss": 0.7891, "step": 18599 }, { "epoch": 0.656985548967058, "grad_norm": 1.5635254383087158, "learning_rate": 2.7809819759445934e-06, "loss": 0.77, "step": 18600 }, { "epoch": 0.6570208707707659, "grad_norm": 1.6703567504882812, "learning_rate": 2.780469398642858e-06, "loss": 0.7803, "step": 18601 }, { "epoch": 0.6570561925744738, "grad_norm": 1.5650933980941772, "learning_rate": 2.779956850389923e-06, "loss": 0.7741, "step": 18602 }, { "epoch": 0.6570915143781817, "grad_norm": 1.5044182538986206, "learning_rate": 2.779444331192494e-06, "loss": 0.7511, "step": 18603 }, { "epoch": 0.6571268361818896, "grad_norm": 1.6877062320709229, "learning_rate": 2.7789318410572797e-06, "loss": 0.8011, "step": 18604 }, { "epoch": 0.6571621579855975, "grad_norm": 1.696721076965332, "learning_rate": 2.778419379990991e-06, "loss": 0.8137, "step": 18605 }, { "epoch": 0.6571974797893054, "grad_norm": 1.8706141710281372, "learning_rate": 2.7779069480003286e-06, "loss": 0.811, "step": 18606 }, { "epoch": 0.6572328015930133, "grad_norm": 1.626978874206543, "learning_rate": 2.7773945450920046e-06, "loss": 0.7585, "step": 18607 }, { "epoch": 0.6572681233967212, "grad_norm": 1.651395320892334, "learning_rate": 2.7768821712727216e-06, "loss": 0.806, "step": 18608 }, { "epoch": 0.6573034452004292, "grad_norm": 1.0060639381408691, "learning_rate": 2.776369826549189e-06, "loss": 0.5989, "step": 18609 }, { "epoch": 0.6573387670041371, "grad_norm": 1.7810759544372559, "learning_rate": 2.7758575109281073e-06, "loss": 0.798, "step": 18610 }, { "epoch": 0.657374088807845, "grad_norm": 1.5483542680740356, "learning_rate": 2.775345224416185e-06, "loss": 0.7811, "step": 18611 }, { "epoch": 0.6574094106115529, "grad_norm": 1.6855251789093018, "learning_rate": 2.774832967020128e-06, "loss": 0.8053, "step": 18612 }, { "epoch": 0.6574447324152608, "grad_norm": 1.5398372411727905, "learning_rate": 2.7743207387466366e-06, "loss": 0.7999, "step": 18613 }, { "epoch": 0.6574800542189687, "grad_norm": 2.400672435760498, "learning_rate": 2.7738085396024172e-06, "loss": 0.7952, "step": 18614 }, { "epoch": 0.6575153760226766, "grad_norm": 1.8306162357330322, "learning_rate": 2.773296369594175e-06, "loss": 0.8184, "step": 18615 }, { "epoch": 0.6575506978263845, "grad_norm": 1.5768095254898071, "learning_rate": 2.7727842287286096e-06, "loss": 0.8017, "step": 18616 }, { "epoch": 0.6575860196300924, "grad_norm": 1.5674458742141724, "learning_rate": 2.7722721170124256e-06, "loss": 0.7739, "step": 18617 }, { "epoch": 0.6576213414338004, "grad_norm": 1.5721057653427124, "learning_rate": 2.7717600344523266e-06, "loss": 0.7838, "step": 18618 }, { "epoch": 0.6576566632375083, "grad_norm": 1.671640157699585, "learning_rate": 2.771247981055012e-06, "loss": 0.7538, "step": 18619 }, { "epoch": 0.6576919850412162, "grad_norm": 1.7812970876693726, "learning_rate": 2.7707359568271853e-06, "loss": 0.7734, "step": 18620 }, { "epoch": 0.6577273068449241, "grad_norm": 1.6037721633911133, "learning_rate": 2.770223961775549e-06, "loss": 0.7765, "step": 18621 }, { "epoch": 0.657762628648632, "grad_norm": 1.8140418529510498, "learning_rate": 2.7697119959068e-06, "loss": 0.8082, "step": 18622 }, { "epoch": 0.6577979504523398, "grad_norm": 1.626758337020874, "learning_rate": 2.7692000592276414e-06, "loss": 0.8049, "step": 18623 }, { "epoch": 0.6578332722560477, "grad_norm": 1.479361891746521, "learning_rate": 2.768688151744775e-06, "loss": 0.7649, "step": 18624 }, { "epoch": 0.6578685940597556, "grad_norm": 1.7720893621444702, "learning_rate": 2.7681762734648985e-06, "loss": 0.7698, "step": 18625 }, { "epoch": 0.6579039158634635, "grad_norm": 1.5515986680984497, "learning_rate": 2.7676644243947096e-06, "loss": 0.7673, "step": 18626 }, { "epoch": 0.6579392376671714, "grad_norm": 1.6771018505096436, "learning_rate": 2.7671526045409093e-06, "loss": 0.7636, "step": 18627 }, { "epoch": 0.6579745594708793, "grad_norm": 1.6074297428131104, "learning_rate": 2.7666408139101975e-06, "loss": 0.7745, "step": 18628 }, { "epoch": 0.6580098812745873, "grad_norm": 1.8629670143127441, "learning_rate": 2.7661290525092693e-06, "loss": 0.8011, "step": 18629 }, { "epoch": 0.6580452030782952, "grad_norm": 1.7728232145309448, "learning_rate": 2.7656173203448237e-06, "loss": 0.7729, "step": 18630 }, { "epoch": 0.6580805248820031, "grad_norm": 1.6905090808868408, "learning_rate": 2.7651056174235613e-06, "loss": 0.7912, "step": 18631 }, { "epoch": 0.658115846685711, "grad_norm": 1.5058610439300537, "learning_rate": 2.764593943752174e-06, "loss": 0.7776, "step": 18632 }, { "epoch": 0.6581511684894189, "grad_norm": 1.9832935333251953, "learning_rate": 2.7640822993373607e-06, "loss": 0.7776, "step": 18633 }, { "epoch": 0.6581864902931268, "grad_norm": 5.152900218963623, "learning_rate": 2.7635706841858205e-06, "loss": 0.7779, "step": 18634 }, { "epoch": 0.6582218120968347, "grad_norm": 1.5698703527450562, "learning_rate": 2.763059098304245e-06, "loss": 0.7884, "step": 18635 }, { "epoch": 0.6582571339005426, "grad_norm": 1.7286901473999023, "learning_rate": 2.762547541699331e-06, "loss": 0.7789, "step": 18636 }, { "epoch": 0.6582924557042505, "grad_norm": 1.7388321161270142, "learning_rate": 2.7620360143777737e-06, "loss": 0.7828, "step": 18637 }, { "epoch": 0.6583277775079585, "grad_norm": 1.679689884185791, "learning_rate": 2.761524516346271e-06, "loss": 0.7777, "step": 18638 }, { "epoch": 0.6583630993116664, "grad_norm": 1.6078596115112305, "learning_rate": 2.761013047611513e-06, "loss": 0.7897, "step": 18639 }, { "epoch": 0.6583984211153743, "grad_norm": 1.7127883434295654, "learning_rate": 2.7605016081801944e-06, "loss": 0.77, "step": 18640 }, { "epoch": 0.6584337429190822, "grad_norm": 1.5546458959579468, "learning_rate": 2.759990198059013e-06, "loss": 0.7665, "step": 18641 }, { "epoch": 0.6584690647227901, "grad_norm": 1.8354822397232056, "learning_rate": 2.7594788172546555e-06, "loss": 0.7501, "step": 18642 }, { "epoch": 0.658504386526498, "grad_norm": 1.6125972270965576, "learning_rate": 2.7589674657738186e-06, "loss": 0.7944, "step": 18643 }, { "epoch": 0.6585397083302059, "grad_norm": 1.693144679069519, "learning_rate": 2.7584561436231983e-06, "loss": 0.7907, "step": 18644 }, { "epoch": 0.6585750301339138, "grad_norm": 1.5648154020309448, "learning_rate": 2.7579448508094786e-06, "loss": 0.7808, "step": 18645 }, { "epoch": 0.6586103519376217, "grad_norm": 1.7752777338027954, "learning_rate": 2.7574335873393553e-06, "loss": 0.7997, "step": 18646 }, { "epoch": 0.6586456737413297, "grad_norm": 1.617406964302063, "learning_rate": 2.75692235321952e-06, "loss": 0.7772, "step": 18647 }, { "epoch": 0.6586809955450376, "grad_norm": 2.0085289478302, "learning_rate": 2.756411148456665e-06, "loss": 0.7873, "step": 18648 }, { "epoch": 0.6587163173487454, "grad_norm": 1.602562665939331, "learning_rate": 2.7558999730574766e-06, "loss": 0.7863, "step": 18649 }, { "epoch": 0.6587516391524533, "grad_norm": 1.5602670907974243, "learning_rate": 2.7553888270286476e-06, "loss": 0.7569, "step": 18650 }, { "epoch": 0.6587869609561612, "grad_norm": 1.6081420183181763, "learning_rate": 2.7548777103768703e-06, "loss": 0.7702, "step": 18651 }, { "epoch": 0.6588222827598691, "grad_norm": 1.7947109937667847, "learning_rate": 2.7543666231088293e-06, "loss": 0.771, "step": 18652 }, { "epoch": 0.658857604563577, "grad_norm": 1.6991852521896362, "learning_rate": 2.7538555652312155e-06, "loss": 0.7522, "step": 18653 }, { "epoch": 0.6588929263672849, "grad_norm": 1.589349627494812, "learning_rate": 2.75334453675072e-06, "loss": 0.7743, "step": 18654 }, { "epoch": 0.6589282481709928, "grad_norm": 1.8569543361663818, "learning_rate": 2.7528335376740266e-06, "loss": 0.8382, "step": 18655 }, { "epoch": 0.6589635699747007, "grad_norm": 1.720686912536621, "learning_rate": 2.7523225680078258e-06, "loss": 0.7737, "step": 18656 }, { "epoch": 0.6589988917784086, "grad_norm": 1.7039868831634521, "learning_rate": 2.751811627758807e-06, "loss": 0.7422, "step": 18657 }, { "epoch": 0.6590342135821166, "grad_norm": 1.8188917636871338, "learning_rate": 2.7513007169336525e-06, "loss": 0.7858, "step": 18658 }, { "epoch": 0.6590695353858245, "grad_norm": 1.7757452726364136, "learning_rate": 2.750789835539052e-06, "loss": 0.7676, "step": 18659 }, { "epoch": 0.6591048571895324, "grad_norm": 1.883754014968872, "learning_rate": 2.750278983581693e-06, "loss": 0.7731, "step": 18660 }, { "epoch": 0.6591401789932403, "grad_norm": 1.7114814519882202, "learning_rate": 2.749768161068258e-06, "loss": 0.7581, "step": 18661 }, { "epoch": 0.6591755007969482, "grad_norm": 1.8177300691604614, "learning_rate": 2.749257368005436e-06, "loss": 0.7876, "step": 18662 }, { "epoch": 0.6592108226006561, "grad_norm": 1.7819478511810303, "learning_rate": 2.7487466043999085e-06, "loss": 0.7782, "step": 18663 }, { "epoch": 0.659246144404364, "grad_norm": 1.631838321685791, "learning_rate": 2.7482358702583646e-06, "loss": 0.7568, "step": 18664 }, { "epoch": 0.6592814662080719, "grad_norm": 1.7584282159805298, "learning_rate": 2.7477251655874835e-06, "loss": 0.7899, "step": 18665 }, { "epoch": 0.6593167880117798, "grad_norm": 1.7313575744628906, "learning_rate": 2.747214490393953e-06, "loss": 0.782, "step": 18666 }, { "epoch": 0.6593521098154878, "grad_norm": 1.669976830482483, "learning_rate": 2.7467038446844575e-06, "loss": 0.8065, "step": 18667 }, { "epoch": 0.6593874316191957, "grad_norm": 1.6630239486694336, "learning_rate": 2.7461932284656763e-06, "loss": 0.7747, "step": 18668 }, { "epoch": 0.6594227534229036, "grad_norm": 1.661944031715393, "learning_rate": 2.7456826417442945e-06, "loss": 0.7826, "step": 18669 }, { "epoch": 0.6594580752266115, "grad_norm": 1.6867952346801758, "learning_rate": 2.7451720845269963e-06, "loss": 0.7925, "step": 18670 }, { "epoch": 0.6594933970303194, "grad_norm": 1.7354309558868408, "learning_rate": 2.7446615568204603e-06, "loss": 0.7958, "step": 18671 }, { "epoch": 0.6595287188340273, "grad_norm": 2.455152750015259, "learning_rate": 2.7441510586313704e-06, "loss": 0.7788, "step": 18672 }, { "epoch": 0.6595640406377352, "grad_norm": 1.8332432508468628, "learning_rate": 2.7436405899664087e-06, "loss": 0.8102, "step": 18673 }, { "epoch": 0.6595993624414431, "grad_norm": 1.6823978424072266, "learning_rate": 2.7431301508322526e-06, "loss": 0.7574, "step": 18674 }, { "epoch": 0.6596346842451509, "grad_norm": 2.0440146923065186, "learning_rate": 2.742619741235585e-06, "loss": 0.7698, "step": 18675 }, { "epoch": 0.6596700060488588, "grad_norm": 1.9996072053909302, "learning_rate": 2.7421093611830887e-06, "loss": 0.7604, "step": 18676 }, { "epoch": 0.6597053278525667, "grad_norm": 1.6362797021865845, "learning_rate": 2.7415990106814377e-06, "loss": 0.7595, "step": 18677 }, { "epoch": 0.6597406496562747, "grad_norm": 1.6537009477615356, "learning_rate": 2.7410886897373145e-06, "loss": 0.8183, "step": 18678 }, { "epoch": 0.6597759714599826, "grad_norm": 1.5681411027908325, "learning_rate": 2.7405783983574e-06, "loss": 0.7682, "step": 18679 }, { "epoch": 0.6598112932636905, "grad_norm": 1.843540906906128, "learning_rate": 2.740068136548369e-06, "loss": 0.7516, "step": 18680 }, { "epoch": 0.6598466150673984, "grad_norm": 1.6608365774154663, "learning_rate": 2.7395579043169027e-06, "loss": 0.8162, "step": 18681 }, { "epoch": 0.6598819368711063, "grad_norm": 1.553328275680542, "learning_rate": 2.7390477016696766e-06, "loss": 0.7446, "step": 18682 }, { "epoch": 0.6599172586748142, "grad_norm": 1.7200850248336792, "learning_rate": 2.7385375286133703e-06, "loss": 0.7944, "step": 18683 }, { "epoch": 0.6599525804785221, "grad_norm": 1.8394452333450317, "learning_rate": 2.738027385154658e-06, "loss": 0.777, "step": 18684 }, { "epoch": 0.65998790228223, "grad_norm": 1.7114357948303223, "learning_rate": 2.7375172713002187e-06, "loss": 0.7808, "step": 18685 }, { "epoch": 0.6600232240859379, "grad_norm": 1.631569743156433, "learning_rate": 2.7370071870567293e-06, "loss": 0.7471, "step": 18686 }, { "epoch": 0.6600585458896459, "grad_norm": 1.5719163417816162, "learning_rate": 2.736497132430862e-06, "loss": 0.7706, "step": 18687 }, { "epoch": 0.6600938676933538, "grad_norm": 1.582445740699768, "learning_rate": 2.7359871074292954e-06, "loss": 0.7798, "step": 18688 }, { "epoch": 0.6601291894970617, "grad_norm": 1.8072277307510376, "learning_rate": 2.7354771120587065e-06, "loss": 0.8231, "step": 18689 }, { "epoch": 0.6601645113007696, "grad_norm": 1.5612077713012695, "learning_rate": 2.7349671463257643e-06, "loss": 0.8016, "step": 18690 }, { "epoch": 0.6601998331044775, "grad_norm": 1.8399301767349243, "learning_rate": 2.734457210237147e-06, "loss": 0.7745, "step": 18691 }, { "epoch": 0.6602351549081854, "grad_norm": 1.6884312629699707, "learning_rate": 2.73394730379953e-06, "loss": 0.7811, "step": 18692 }, { "epoch": 0.6602704767118933, "grad_norm": 1.7954727411270142, "learning_rate": 2.733437427019583e-06, "loss": 0.7615, "step": 18693 }, { "epoch": 0.6603057985156012, "grad_norm": 1.014407753944397, "learning_rate": 2.73292757990398e-06, "loss": 0.572, "step": 18694 }, { "epoch": 0.6603411203193091, "grad_norm": 1.814851999282837, "learning_rate": 2.7324177624593952e-06, "loss": 0.7823, "step": 18695 }, { "epoch": 0.660376442123017, "grad_norm": 1.6775555610656738, "learning_rate": 2.731907974692502e-06, "loss": 0.7699, "step": 18696 }, { "epoch": 0.660411763926725, "grad_norm": 1.7571519613265991, "learning_rate": 2.7313982166099697e-06, "loss": 0.7541, "step": 18697 }, { "epoch": 0.6604470857304329, "grad_norm": 1.5848106145858765, "learning_rate": 2.7308884882184707e-06, "loss": 0.7468, "step": 18698 }, { "epoch": 0.6604824075341408, "grad_norm": 1.599289894104004, "learning_rate": 2.7303787895246803e-06, "loss": 0.7614, "step": 18699 }, { "epoch": 0.6605177293378487, "grad_norm": 1.699072241783142, "learning_rate": 2.7298691205352614e-06, "loss": 0.732, "step": 18700 }, { "epoch": 0.6605530511415565, "grad_norm": 2.060468912124634, "learning_rate": 2.729359481256889e-06, "loss": 0.7424, "step": 18701 }, { "epoch": 0.6605883729452644, "grad_norm": 1.8159942626953125, "learning_rate": 2.7288498716962354e-06, "loss": 0.7848, "step": 18702 }, { "epoch": 0.6606236947489723, "grad_norm": 1.6815344095230103, "learning_rate": 2.7283402918599654e-06, "loss": 0.7642, "step": 18703 }, { "epoch": 0.6606590165526802, "grad_norm": 1.7974647283554077, "learning_rate": 2.72783074175475e-06, "loss": 0.786, "step": 18704 }, { "epoch": 0.6606943383563881, "grad_norm": 1.862632155418396, "learning_rate": 2.72732122138726e-06, "loss": 0.8058, "step": 18705 }, { "epoch": 0.660729660160096, "grad_norm": 1.7930341958999634, "learning_rate": 2.7268117307641638e-06, "loss": 0.783, "step": 18706 }, { "epoch": 0.660764981963804, "grad_norm": 2.052647113800049, "learning_rate": 2.7263022698921266e-06, "loss": 0.7658, "step": 18707 }, { "epoch": 0.6608003037675119, "grad_norm": 1.7268034219741821, "learning_rate": 2.7257928387778175e-06, "loss": 0.7501, "step": 18708 }, { "epoch": 0.6608356255712198, "grad_norm": 1.9482378959655762, "learning_rate": 2.725283437427907e-06, "loss": 0.8008, "step": 18709 }, { "epoch": 0.6608709473749277, "grad_norm": 2.049520492553711, "learning_rate": 2.724774065849056e-06, "loss": 0.7683, "step": 18710 }, { "epoch": 0.6609062691786356, "grad_norm": 2.678258180618286, "learning_rate": 2.724264724047936e-06, "loss": 0.7654, "step": 18711 }, { "epoch": 0.6609415909823435, "grad_norm": 1.7028849124908447, "learning_rate": 2.7237554120312124e-06, "loss": 0.7502, "step": 18712 }, { "epoch": 0.6609769127860514, "grad_norm": 1.724251389503479, "learning_rate": 2.7232461298055478e-06, "loss": 0.7628, "step": 18713 }, { "epoch": 0.6610122345897593, "grad_norm": 1.645333170890808, "learning_rate": 2.7227368773776107e-06, "loss": 0.7392, "step": 18714 }, { "epoch": 0.6610475563934672, "grad_norm": 1.616965651512146, "learning_rate": 2.722227654754067e-06, "loss": 0.7741, "step": 18715 }, { "epoch": 0.6610828781971751, "grad_norm": 1.718144178390503, "learning_rate": 2.7217184619415776e-06, "loss": 0.7561, "step": 18716 }, { "epoch": 0.6611182000008831, "grad_norm": 1.6256170272827148, "learning_rate": 2.7212092989468087e-06, "loss": 0.7694, "step": 18717 }, { "epoch": 0.661153521804591, "grad_norm": 1.9706194400787354, "learning_rate": 2.7207001657764265e-06, "loss": 0.7896, "step": 18718 }, { "epoch": 0.6611888436082989, "grad_norm": 1.6509608030319214, "learning_rate": 2.7201910624370912e-06, "loss": 0.7815, "step": 18719 }, { "epoch": 0.6612241654120068, "grad_norm": 2.702821969985962, "learning_rate": 2.719681988935465e-06, "loss": 0.7572, "step": 18720 }, { "epoch": 0.6612594872157147, "grad_norm": 1.76081120967865, "learning_rate": 2.719172945278213e-06, "loss": 0.7577, "step": 18721 }, { "epoch": 0.6612948090194226, "grad_norm": 1.7083743810653687, "learning_rate": 2.7186639314719977e-06, "loss": 0.7932, "step": 18722 }, { "epoch": 0.6613301308231305, "grad_norm": 1.6995769739151, "learning_rate": 2.7181549475234785e-06, "loss": 0.7514, "step": 18723 }, { "epoch": 0.6613654526268384, "grad_norm": 1.9833356142044067, "learning_rate": 2.7176459934393186e-06, "loss": 0.7822, "step": 18724 }, { "epoch": 0.6614007744305463, "grad_norm": 1.7092949151992798, "learning_rate": 2.717137069226181e-06, "loss": 0.7591, "step": 18725 }, { "epoch": 0.6614360962342543, "grad_norm": 1.7427000999450684, "learning_rate": 2.716628174890722e-06, "loss": 0.8067, "step": 18726 }, { "epoch": 0.6614714180379621, "grad_norm": 1.7145040035247803, "learning_rate": 2.7161193104396043e-06, "loss": 0.7639, "step": 18727 }, { "epoch": 0.66150673984167, "grad_norm": 1.6981534957885742, "learning_rate": 2.7156104758794894e-06, "loss": 0.74, "step": 18728 }, { "epoch": 0.6615420616453779, "grad_norm": 1.7713918685913086, "learning_rate": 2.715101671217033e-06, "loss": 0.8137, "step": 18729 }, { "epoch": 0.6615773834490858, "grad_norm": 1.7572208642959595, "learning_rate": 2.7145928964588973e-06, "loss": 0.7659, "step": 18730 }, { "epoch": 0.6616127052527937, "grad_norm": 1.8824127912521362, "learning_rate": 2.714084151611742e-06, "loss": 0.8095, "step": 18731 }, { "epoch": 0.6616480270565016, "grad_norm": 1.551236867904663, "learning_rate": 2.713575436682221e-06, "loss": 0.752, "step": 18732 }, { "epoch": 0.6616833488602095, "grad_norm": 1.6826852560043335, "learning_rate": 2.7130667516769954e-06, "loss": 0.7867, "step": 18733 }, { "epoch": 0.6617186706639174, "grad_norm": 1.5649826526641846, "learning_rate": 2.712558096602724e-06, "loss": 0.7975, "step": 18734 }, { "epoch": 0.6617539924676253, "grad_norm": 1.71602201461792, "learning_rate": 2.7120494714660604e-06, "loss": 0.7948, "step": 18735 }, { "epoch": 0.6617893142713333, "grad_norm": 2.3129003047943115, "learning_rate": 2.711540876273665e-06, "loss": 0.7818, "step": 18736 }, { "epoch": 0.6618246360750412, "grad_norm": 1.6906481981277466, "learning_rate": 2.7110323110321906e-06, "loss": 0.796, "step": 18737 }, { "epoch": 0.6618599578787491, "grad_norm": 2.272752523422241, "learning_rate": 2.7105237757482974e-06, "loss": 0.7773, "step": 18738 }, { "epoch": 0.661895279682457, "grad_norm": 1.6820645332336426, "learning_rate": 2.710015270428636e-06, "loss": 0.7473, "step": 18739 }, { "epoch": 0.6619306014861649, "grad_norm": 2.0264270305633545, "learning_rate": 2.709506795079865e-06, "loss": 0.7635, "step": 18740 }, { "epoch": 0.6619659232898728, "grad_norm": 1.6851189136505127, "learning_rate": 2.7089983497086397e-06, "loss": 0.7673, "step": 18741 }, { "epoch": 0.6620012450935807, "grad_norm": 1.6268482208251953, "learning_rate": 2.708489934321612e-06, "loss": 0.7832, "step": 18742 }, { "epoch": 0.6620365668972886, "grad_norm": 1.0785677433013916, "learning_rate": 2.707981548925437e-06, "loss": 0.6019, "step": 18743 }, { "epoch": 0.6620718887009965, "grad_norm": 1.0194865465164185, "learning_rate": 2.70747319352677e-06, "loss": 0.5776, "step": 18744 }, { "epoch": 0.6621072105047044, "grad_norm": 1.7698570489883423, "learning_rate": 2.7069648681322614e-06, "loss": 0.797, "step": 18745 }, { "epoch": 0.6621425323084124, "grad_norm": 1.6381548643112183, "learning_rate": 2.706456572748566e-06, "loss": 0.7392, "step": 18746 }, { "epoch": 0.6621778541121203, "grad_norm": 1.8552745580673218, "learning_rate": 2.7059483073823373e-06, "loss": 0.7661, "step": 18747 }, { "epoch": 0.6622131759158282, "grad_norm": 1.786111831665039, "learning_rate": 2.705440072040224e-06, "loss": 0.791, "step": 18748 }, { "epoch": 0.6622484977195361, "grad_norm": 1.68861722946167, "learning_rate": 2.7049318667288802e-06, "loss": 0.7938, "step": 18749 }, { "epoch": 0.662283819523244, "grad_norm": 1.63291335105896, "learning_rate": 2.7044236914549583e-06, "loss": 0.7757, "step": 18750 }, { "epoch": 0.6623191413269519, "grad_norm": 1.888275384902954, "learning_rate": 2.7039155462251053e-06, "loss": 0.7873, "step": 18751 }, { "epoch": 0.6623544631306598, "grad_norm": 1.7540814876556396, "learning_rate": 2.7034074310459746e-06, "loss": 0.7879, "step": 18752 }, { "epoch": 0.6623897849343676, "grad_norm": 1.6959079504013062, "learning_rate": 2.7028993459242155e-06, "loss": 0.7779, "step": 18753 }, { "epoch": 0.6624251067380755, "grad_norm": 1.6170541048049927, "learning_rate": 2.7023912908664797e-06, "loss": 0.7654, "step": 18754 }, { "epoch": 0.6624604285417834, "grad_norm": 1.788069248199463, "learning_rate": 2.7018832658794147e-06, "loss": 0.807, "step": 18755 }, { "epoch": 0.6624957503454914, "grad_norm": 1.6759538650512695, "learning_rate": 2.7013752709696684e-06, "loss": 0.7603, "step": 18756 }, { "epoch": 0.6625310721491993, "grad_norm": 1.615092158317566, "learning_rate": 2.700867306143892e-06, "loss": 0.7925, "step": 18757 }, { "epoch": 0.6625663939529072, "grad_norm": 1.6757997274398804, "learning_rate": 2.7003593714087295e-06, "loss": 0.7682, "step": 18758 }, { "epoch": 0.6626017157566151, "grad_norm": 1.822603702545166, "learning_rate": 2.699851466770832e-06, "loss": 0.7975, "step": 18759 }, { "epoch": 0.662637037560323, "grad_norm": 1.7278672456741333, "learning_rate": 2.6993435922368477e-06, "loss": 0.784, "step": 18760 }, { "epoch": 0.6626723593640309, "grad_norm": 0.904668390750885, "learning_rate": 2.6988357478134202e-06, "loss": 0.5744, "step": 18761 }, { "epoch": 0.6627076811677388, "grad_norm": 1.5674024820327759, "learning_rate": 2.698327933507198e-06, "loss": 0.7984, "step": 18762 }, { "epoch": 0.6627430029714467, "grad_norm": 1.5527976751327515, "learning_rate": 2.697820149324827e-06, "loss": 0.7991, "step": 18763 }, { "epoch": 0.6627783247751546, "grad_norm": 1.804901123046875, "learning_rate": 2.6973123952729553e-06, "loss": 0.7515, "step": 18764 }, { "epoch": 0.6628136465788625, "grad_norm": 1.7047553062438965, "learning_rate": 2.6968046713582236e-06, "loss": 0.7657, "step": 18765 }, { "epoch": 0.6628489683825705, "grad_norm": 1.5371620655059814, "learning_rate": 2.6962969775872796e-06, "loss": 0.7379, "step": 18766 }, { "epoch": 0.6628842901862784, "grad_norm": 1.5603525638580322, "learning_rate": 2.69578931396677e-06, "loss": 0.7857, "step": 18767 }, { "epoch": 0.6629196119899863, "grad_norm": 1.7346748113632202, "learning_rate": 2.6952816805033344e-06, "loss": 0.8241, "step": 18768 }, { "epoch": 0.6629549337936942, "grad_norm": 1.6971116065979004, "learning_rate": 2.694774077203619e-06, "loss": 0.8012, "step": 18769 }, { "epoch": 0.6629902555974021, "grad_norm": 1.5360618829727173, "learning_rate": 2.6942665040742694e-06, "loss": 0.776, "step": 18770 }, { "epoch": 0.66302557740111, "grad_norm": 1.7785263061523438, "learning_rate": 2.693758961121924e-06, "loss": 0.8139, "step": 18771 }, { "epoch": 0.6630608992048179, "grad_norm": 1.6348079442977905, "learning_rate": 2.6932514483532277e-06, "loss": 0.7835, "step": 18772 }, { "epoch": 0.6630962210085258, "grad_norm": 1.5790733098983765, "learning_rate": 2.6927439657748257e-06, "loss": 0.7638, "step": 18773 }, { "epoch": 0.6631315428122337, "grad_norm": 1.6408873796463013, "learning_rate": 2.6922365133933558e-06, "loss": 0.7849, "step": 18774 }, { "epoch": 0.6631668646159417, "grad_norm": 1.574616551399231, "learning_rate": 2.691729091215459e-06, "loss": 0.7759, "step": 18775 }, { "epoch": 0.6632021864196496, "grad_norm": 1.6653283834457397, "learning_rate": 2.6912216992477785e-06, "loss": 0.7576, "step": 18776 }, { "epoch": 0.6632375082233575, "grad_norm": 1.708125352859497, "learning_rate": 2.6907143374969557e-06, "loss": 0.762, "step": 18777 }, { "epoch": 0.6632728300270654, "grad_norm": 1.668817162513733, "learning_rate": 2.6902070059696282e-06, "loss": 0.7428, "step": 18778 }, { "epoch": 0.6633081518307732, "grad_norm": 1.7331626415252686, "learning_rate": 2.689699704672437e-06, "loss": 0.7978, "step": 18779 }, { "epoch": 0.6633434736344811, "grad_norm": 1.8208588361740112, "learning_rate": 2.6891924336120232e-06, "loss": 0.7882, "step": 18780 }, { "epoch": 0.663378795438189, "grad_norm": 2.616955280303955, "learning_rate": 2.688685192795023e-06, "loss": 0.7885, "step": 18781 }, { "epoch": 0.6634141172418969, "grad_norm": 1.724249005317688, "learning_rate": 2.6881779822280762e-06, "loss": 0.8064, "step": 18782 }, { "epoch": 0.6634494390456048, "grad_norm": 1.7956663370132446, "learning_rate": 2.6876708019178237e-06, "loss": 0.7533, "step": 18783 }, { "epoch": 0.6634847608493127, "grad_norm": 1.7685860395431519, "learning_rate": 2.6871636518708988e-06, "loss": 0.8227, "step": 18784 }, { "epoch": 0.6635200826530206, "grad_norm": 1.5840452909469604, "learning_rate": 2.686656532093942e-06, "loss": 0.8023, "step": 18785 }, { "epoch": 0.6635554044567286, "grad_norm": 1.8255939483642578, "learning_rate": 2.686149442593591e-06, "loss": 0.7917, "step": 18786 }, { "epoch": 0.6635907262604365, "grad_norm": 1.7361106872558594, "learning_rate": 2.685642383376479e-06, "loss": 0.7557, "step": 18787 }, { "epoch": 0.6636260480641444, "grad_norm": 1.6007139682769775, "learning_rate": 2.6851353544492443e-06, "loss": 0.7925, "step": 18788 }, { "epoch": 0.6636613698678523, "grad_norm": 1.7656522989273071, "learning_rate": 2.6846283558185254e-06, "loss": 0.8359, "step": 18789 }, { "epoch": 0.6636966916715602, "grad_norm": 1.6740411520004272, "learning_rate": 2.6841213874909535e-06, "loss": 0.7836, "step": 18790 }, { "epoch": 0.6637320134752681, "grad_norm": 1.819143295288086, "learning_rate": 2.683614449473165e-06, "loss": 0.7759, "step": 18791 }, { "epoch": 0.663767335278976, "grad_norm": 1.9200642108917236, "learning_rate": 2.6831075417717977e-06, "loss": 0.797, "step": 18792 }, { "epoch": 0.6638026570826839, "grad_norm": 1.6210920810699463, "learning_rate": 2.6826006643934826e-06, "loss": 0.7445, "step": 18793 }, { "epoch": 0.6638379788863918, "grad_norm": 1.6667097806930542, "learning_rate": 2.682093817344853e-06, "loss": 0.7627, "step": 18794 }, { "epoch": 0.6638733006900998, "grad_norm": 1.533834457397461, "learning_rate": 2.681587000632544e-06, "loss": 0.7602, "step": 18795 }, { "epoch": 0.6639086224938077, "grad_norm": 1.9487783908843994, "learning_rate": 2.6810802142631897e-06, "loss": 0.7966, "step": 18796 }, { "epoch": 0.6639439442975156, "grad_norm": 1.7692807912826538, "learning_rate": 2.6805734582434206e-06, "loss": 0.7916, "step": 18797 }, { "epoch": 0.6639792661012235, "grad_norm": 1.8702784776687622, "learning_rate": 2.680066732579869e-06, "loss": 0.7931, "step": 18798 }, { "epoch": 0.6640145879049314, "grad_norm": 1.8508365154266357, "learning_rate": 2.679560037279171e-06, "loss": 0.772, "step": 18799 }, { "epoch": 0.6640499097086393, "grad_norm": 1.879550576210022, "learning_rate": 2.6790533723479524e-06, "loss": 0.7696, "step": 18800 }, { "epoch": 0.6640852315123472, "grad_norm": 1.6297982931137085, "learning_rate": 2.678546737792847e-06, "loss": 0.7652, "step": 18801 }, { "epoch": 0.6641205533160551, "grad_norm": 1.795000433921814, "learning_rate": 2.6780401336204874e-06, "loss": 0.8203, "step": 18802 }, { "epoch": 0.664155875119763, "grad_norm": 1.8148895502090454, "learning_rate": 2.677533559837501e-06, "loss": 0.7794, "step": 18803 }, { "epoch": 0.664191196923471, "grad_norm": 1.8267920017242432, "learning_rate": 2.6770270164505175e-06, "loss": 0.8201, "step": 18804 }, { "epoch": 0.6642265187271787, "grad_norm": 1.6013495922088623, "learning_rate": 2.67652050346617e-06, "loss": 0.8072, "step": 18805 }, { "epoch": 0.6642618405308867, "grad_norm": 1.6689331531524658, "learning_rate": 2.676014020891084e-06, "loss": 0.7679, "step": 18806 }, { "epoch": 0.6642971623345946, "grad_norm": 1.6865055561065674, "learning_rate": 2.6755075687318894e-06, "loss": 0.7678, "step": 18807 }, { "epoch": 0.6643324841383025, "grad_norm": 1.9903275966644287, "learning_rate": 2.6750011469952162e-06, "loss": 0.7745, "step": 18808 }, { "epoch": 0.6643678059420104, "grad_norm": 1.5769155025482178, "learning_rate": 2.67449475568769e-06, "loss": 0.7682, "step": 18809 }, { "epoch": 0.6644031277457183, "grad_norm": 1.8160748481750488, "learning_rate": 2.6739883948159407e-06, "loss": 0.77, "step": 18810 }, { "epoch": 0.6644384495494262, "grad_norm": 1.9307458400726318, "learning_rate": 2.6734820643865922e-06, "loss": 0.7716, "step": 18811 }, { "epoch": 0.6644737713531341, "grad_norm": 1.7930508852005005, "learning_rate": 2.672975764406275e-06, "loss": 0.7545, "step": 18812 }, { "epoch": 0.664509093156842, "grad_norm": 1.9273148775100708, "learning_rate": 2.672469494881611e-06, "loss": 0.7972, "step": 18813 }, { "epoch": 0.66454441496055, "grad_norm": 1.6295459270477295, "learning_rate": 2.6719632558192293e-06, "loss": 0.7551, "step": 18814 }, { "epoch": 0.6645797367642579, "grad_norm": 1.6043492555618286, "learning_rate": 2.6714570472257575e-06, "loss": 0.7757, "step": 18815 }, { "epoch": 0.6646150585679658, "grad_norm": 1.8709357976913452, "learning_rate": 2.6709508691078145e-06, "loss": 0.7518, "step": 18816 }, { "epoch": 0.6646503803716737, "grad_norm": 1.6476739645004272, "learning_rate": 2.67044472147203e-06, "loss": 0.7849, "step": 18817 }, { "epoch": 0.6646857021753816, "grad_norm": 1.6068872213363647, "learning_rate": 2.6699386043250285e-06, "loss": 0.7861, "step": 18818 }, { "epoch": 0.6647210239790895, "grad_norm": 1.6640839576721191, "learning_rate": 2.6694325176734313e-06, "loss": 0.7802, "step": 18819 }, { "epoch": 0.6647563457827974, "grad_norm": 1.738764762878418, "learning_rate": 2.668926461523863e-06, "loss": 0.797, "step": 18820 }, { "epoch": 0.6647916675865053, "grad_norm": 1.9137674570083618, "learning_rate": 2.668420435882946e-06, "loss": 0.7627, "step": 18821 }, { "epoch": 0.6648269893902132, "grad_norm": 1.7269387245178223, "learning_rate": 2.667914440757307e-06, "loss": 0.7742, "step": 18822 }, { "epoch": 0.6648623111939211, "grad_norm": 1.797776222229004, "learning_rate": 2.6674084761535636e-06, "loss": 0.7906, "step": 18823 }, { "epoch": 0.664897632997629, "grad_norm": 1.5725066661834717, "learning_rate": 2.666902542078339e-06, "loss": 0.761, "step": 18824 }, { "epoch": 0.664932954801337, "grad_norm": 1.6540204286575317, "learning_rate": 2.666396638538257e-06, "loss": 0.7786, "step": 18825 }, { "epoch": 0.6649682766050449, "grad_norm": 1.80870521068573, "learning_rate": 2.6658907655399357e-06, "loss": 0.7706, "step": 18826 }, { "epoch": 0.6650035984087528, "grad_norm": 0.9066553115844727, "learning_rate": 2.6653849230899976e-06, "loss": 0.5532, "step": 18827 }, { "epoch": 0.6650389202124607, "grad_norm": 1.7277568578720093, "learning_rate": 2.664879111195066e-06, "loss": 0.779, "step": 18828 }, { "epoch": 0.6650742420161686, "grad_norm": 1.8868077993392944, "learning_rate": 2.6643733298617548e-06, "loss": 0.7936, "step": 18829 }, { "epoch": 0.6651095638198765, "grad_norm": 1.5112639665603638, "learning_rate": 2.663867579096685e-06, "loss": 0.761, "step": 18830 }, { "epoch": 0.6651448856235843, "grad_norm": 1.9439605474472046, "learning_rate": 2.663361858906478e-06, "loss": 0.7713, "step": 18831 }, { "epoch": 0.6651802074272922, "grad_norm": 1.5574058294296265, "learning_rate": 2.6628561692977538e-06, "loss": 0.7486, "step": 18832 }, { "epoch": 0.6652155292310001, "grad_norm": 1.7768957614898682, "learning_rate": 2.6623505102771263e-06, "loss": 0.7932, "step": 18833 }, { "epoch": 0.665250851034708, "grad_norm": 1.6749159097671509, "learning_rate": 2.661844881851216e-06, "loss": 0.7288, "step": 18834 }, { "epoch": 0.665286172838416, "grad_norm": 1.4946010112762451, "learning_rate": 2.6613392840266426e-06, "loss": 0.737, "step": 18835 }, { "epoch": 0.6653214946421239, "grad_norm": 1.9912203550338745, "learning_rate": 2.6608337168100184e-06, "loss": 0.7625, "step": 18836 }, { "epoch": 0.6653568164458318, "grad_norm": 1.5764739513397217, "learning_rate": 2.660328180207964e-06, "loss": 0.7738, "step": 18837 }, { "epoch": 0.6653921382495397, "grad_norm": 1.611104130744934, "learning_rate": 2.6598226742270957e-06, "loss": 0.7753, "step": 18838 }, { "epoch": 0.6654274600532476, "grad_norm": 1.5840739011764526, "learning_rate": 2.6593171988740262e-06, "loss": 0.7687, "step": 18839 }, { "epoch": 0.6654627818569555, "grad_norm": 1.6272889375686646, "learning_rate": 2.6588117541553737e-06, "loss": 0.7789, "step": 18840 }, { "epoch": 0.6654981036606634, "grad_norm": 1.726091980934143, "learning_rate": 2.6583063400777544e-06, "loss": 0.7865, "step": 18841 }, { "epoch": 0.6655334254643713, "grad_norm": 1.6041369438171387, "learning_rate": 2.65780095664778e-06, "loss": 0.7741, "step": 18842 }, { "epoch": 0.6655687472680792, "grad_norm": 1.7782914638519287, "learning_rate": 2.657295603872066e-06, "loss": 0.7777, "step": 18843 }, { "epoch": 0.6656040690717872, "grad_norm": 1.5868752002716064, "learning_rate": 2.6567902817572287e-06, "loss": 0.7589, "step": 18844 }, { "epoch": 0.6656393908754951, "grad_norm": 1.6192995309829712, "learning_rate": 2.656284990309878e-06, "loss": 0.7893, "step": 18845 }, { "epoch": 0.665674712679203, "grad_norm": 1.7793596982955933, "learning_rate": 2.655779729536629e-06, "loss": 0.7827, "step": 18846 }, { "epoch": 0.6657100344829109, "grad_norm": 1.640407681465149, "learning_rate": 2.655274499444096e-06, "loss": 0.761, "step": 18847 }, { "epoch": 0.6657453562866188, "grad_norm": 1.5898007154464722, "learning_rate": 2.65476930003889e-06, "loss": 0.7177, "step": 18848 }, { "epoch": 0.6657806780903267, "grad_norm": 1.580492377281189, "learning_rate": 2.65426413132762e-06, "loss": 0.7753, "step": 18849 }, { "epoch": 0.6658159998940346, "grad_norm": 1.9704012870788574, "learning_rate": 2.6537589933168993e-06, "loss": 0.7834, "step": 18850 }, { "epoch": 0.6658513216977425, "grad_norm": 1.8317500352859497, "learning_rate": 2.6532538860133428e-06, "loss": 0.7536, "step": 18851 }, { "epoch": 0.6658866435014504, "grad_norm": 1.6467993259429932, "learning_rate": 2.6527488094235567e-06, "loss": 0.7565, "step": 18852 }, { "epoch": 0.6659219653051583, "grad_norm": 1.6767233610153198, "learning_rate": 2.652243763554152e-06, "loss": 0.7529, "step": 18853 }, { "epoch": 0.6659572871088663, "grad_norm": 1.4577194452285767, "learning_rate": 2.651738748411742e-06, "loss": 0.7595, "step": 18854 }, { "epoch": 0.6659926089125742, "grad_norm": 2.1400773525238037, "learning_rate": 2.6512337640029322e-06, "loss": 0.7825, "step": 18855 }, { "epoch": 0.6660279307162821, "grad_norm": 1.4903993606567383, "learning_rate": 2.6507288103343332e-06, "loss": 0.7679, "step": 18856 }, { "epoch": 0.6660632525199899, "grad_norm": 5.485467910766602, "learning_rate": 2.650223887412556e-06, "loss": 0.7621, "step": 18857 }, { "epoch": 0.6660985743236978, "grad_norm": 1.6100859642028809, "learning_rate": 2.649718995244205e-06, "loss": 0.7621, "step": 18858 }, { "epoch": 0.6661338961274057, "grad_norm": 0.9993711709976196, "learning_rate": 2.649214133835889e-06, "loss": 0.5732, "step": 18859 }, { "epoch": 0.6661692179311136, "grad_norm": 1.6442070007324219, "learning_rate": 2.648709303194219e-06, "loss": 0.7677, "step": 18860 }, { "epoch": 0.6662045397348215, "grad_norm": 1.5240023136138916, "learning_rate": 2.648204503325798e-06, "loss": 0.7801, "step": 18861 }, { "epoch": 0.6662398615385294, "grad_norm": 1.6348267793655396, "learning_rate": 2.6476997342372346e-06, "loss": 0.7823, "step": 18862 }, { "epoch": 0.6662751833422373, "grad_norm": 1.7085551023483276, "learning_rate": 2.6471949959351362e-06, "loss": 0.7721, "step": 18863 }, { "epoch": 0.6663105051459453, "grad_norm": 1.7189418077468872, "learning_rate": 2.6466902884261058e-06, "loss": 0.7538, "step": 18864 }, { "epoch": 0.6663458269496532, "grad_norm": 1.8650696277618408, "learning_rate": 2.6461856117167523e-06, "loss": 0.7856, "step": 18865 }, { "epoch": 0.6663811487533611, "grad_norm": 1.8784124851226807, "learning_rate": 2.6456809658136774e-06, "loss": 0.7707, "step": 18866 }, { "epoch": 0.666416470557069, "grad_norm": 1.750258207321167, "learning_rate": 2.645176350723489e-06, "loss": 0.7789, "step": 18867 }, { "epoch": 0.6664517923607769, "grad_norm": 1.9488009214401245, "learning_rate": 2.6446717664527875e-06, "loss": 0.8016, "step": 18868 }, { "epoch": 0.6664871141644848, "grad_norm": 1.7724878787994385, "learning_rate": 2.6441672130081802e-06, "loss": 0.7975, "step": 18869 }, { "epoch": 0.6665224359681927, "grad_norm": 1.6674525737762451, "learning_rate": 2.6436626903962714e-06, "loss": 0.7865, "step": 18870 }, { "epoch": 0.6665577577719006, "grad_norm": 1.609823226928711, "learning_rate": 2.64315819862366e-06, "loss": 0.7818, "step": 18871 }, { "epoch": 0.6665930795756085, "grad_norm": 1.620739221572876, "learning_rate": 2.642653737696951e-06, "loss": 0.7768, "step": 18872 }, { "epoch": 0.6666284013793164, "grad_norm": 3.713805913925171, "learning_rate": 2.642149307622749e-06, "loss": 0.8144, "step": 18873 }, { "epoch": 0.6666637231830244, "grad_norm": 1.532086730003357, "learning_rate": 2.6416449084076512e-06, "loss": 0.756, "step": 18874 }, { "epoch": 0.6666990449867323, "grad_norm": 1.8011032342910767, "learning_rate": 2.6411405400582624e-06, "loss": 0.7575, "step": 18875 }, { "epoch": 0.6667343667904402, "grad_norm": 1.6437689065933228, "learning_rate": 2.6406362025811836e-06, "loss": 0.7602, "step": 18876 }, { "epoch": 0.6667696885941481, "grad_norm": 1.8119484186172485, "learning_rate": 2.6401318959830136e-06, "loss": 0.7511, "step": 18877 }, { "epoch": 0.666805010397856, "grad_norm": 1.8795950412750244, "learning_rate": 2.6396276202703538e-06, "loss": 0.7891, "step": 18878 }, { "epoch": 0.6668403322015639, "grad_norm": 2.025372266769409, "learning_rate": 2.6391233754498038e-06, "loss": 0.7533, "step": 18879 }, { "epoch": 0.6668756540052718, "grad_norm": 1.715811014175415, "learning_rate": 2.6386191615279645e-06, "loss": 0.7792, "step": 18880 }, { "epoch": 0.6669109758089797, "grad_norm": 1.8000377416610718, "learning_rate": 2.6381149785114323e-06, "loss": 0.7793, "step": 18881 }, { "epoch": 0.6669462976126876, "grad_norm": 1.5685287714004517, "learning_rate": 2.637610826406808e-06, "loss": 0.7922, "step": 18882 }, { "epoch": 0.6669816194163954, "grad_norm": 1.56610906124115, "learning_rate": 2.63710670522069e-06, "loss": 0.7687, "step": 18883 }, { "epoch": 0.6670169412201034, "grad_norm": 1.6122452020645142, "learning_rate": 2.636602614959676e-06, "loss": 0.7607, "step": 18884 }, { "epoch": 0.6670522630238113, "grad_norm": 0.8678116798400879, "learning_rate": 2.63609855563036e-06, "loss": 0.5588, "step": 18885 }, { "epoch": 0.6670875848275192, "grad_norm": 1.5896580219268799, "learning_rate": 2.635594527239345e-06, "loss": 0.7725, "step": 18886 }, { "epoch": 0.6671229066312271, "grad_norm": 1.845172643661499, "learning_rate": 2.6350905297932203e-06, "loss": 0.7801, "step": 18887 }, { "epoch": 0.667158228434935, "grad_norm": 1.6069633960723877, "learning_rate": 2.634586563298588e-06, "loss": 0.761, "step": 18888 }, { "epoch": 0.6671935502386429, "grad_norm": 1.7799594402313232, "learning_rate": 2.6340826277620413e-06, "loss": 0.8092, "step": 18889 }, { "epoch": 0.6672288720423508, "grad_norm": 1.628872036933899, "learning_rate": 2.633578723190179e-06, "loss": 0.7664, "step": 18890 }, { "epoch": 0.6672641938460587, "grad_norm": 1.5927106142044067, "learning_rate": 2.633074849589591e-06, "loss": 0.7495, "step": 18891 }, { "epoch": 0.6672995156497666, "grad_norm": 1.688236951828003, "learning_rate": 2.632571006966874e-06, "loss": 0.7922, "step": 18892 }, { "epoch": 0.6673348374534745, "grad_norm": 1.6151617765426636, "learning_rate": 2.6320671953286258e-06, "loss": 0.7355, "step": 18893 }, { "epoch": 0.6673701592571825, "grad_norm": 1.8206981420516968, "learning_rate": 2.6315634146814346e-06, "loss": 0.8122, "step": 18894 }, { "epoch": 0.6674054810608904, "grad_norm": 1.9246537685394287, "learning_rate": 2.631059665031896e-06, "loss": 0.7909, "step": 18895 }, { "epoch": 0.6674408028645983, "grad_norm": 1.6257370710372925, "learning_rate": 2.630555946386605e-06, "loss": 0.7549, "step": 18896 }, { "epoch": 0.6674761246683062, "grad_norm": 2.02850341796875, "learning_rate": 2.6300522587521503e-06, "loss": 0.7835, "step": 18897 }, { "epoch": 0.6675114464720141, "grad_norm": 1.6157236099243164, "learning_rate": 2.629548602135127e-06, "loss": 0.781, "step": 18898 }, { "epoch": 0.667546768275722, "grad_norm": 1.645762324333191, "learning_rate": 2.6290449765421276e-06, "loss": 0.7806, "step": 18899 }, { "epoch": 0.6675820900794299, "grad_norm": 1.7620816230773926, "learning_rate": 2.6285413819797397e-06, "loss": 0.7781, "step": 18900 }, { "epoch": 0.6676174118831378, "grad_norm": 1.763246774673462, "learning_rate": 2.6280378184545563e-06, "loss": 0.7792, "step": 18901 }, { "epoch": 0.6676527336868457, "grad_norm": 1.7061258554458618, "learning_rate": 2.62753428597317e-06, "loss": 0.7782, "step": 18902 }, { "epoch": 0.6676880554905537, "grad_norm": 2.078519344329834, "learning_rate": 2.6270307845421696e-06, "loss": 0.7672, "step": 18903 }, { "epoch": 0.6677233772942616, "grad_norm": 1.779654622077942, "learning_rate": 2.6265273141681415e-06, "loss": 0.7711, "step": 18904 }, { "epoch": 0.6677586990979695, "grad_norm": 1.6840088367462158, "learning_rate": 2.6260238748576784e-06, "loss": 0.7638, "step": 18905 }, { "epoch": 0.6677940209016774, "grad_norm": 1.8304698467254639, "learning_rate": 2.62552046661737e-06, "loss": 0.7655, "step": 18906 }, { "epoch": 0.6678293427053853, "grad_norm": 2.7145748138427734, "learning_rate": 2.625017089453802e-06, "loss": 0.7814, "step": 18907 }, { "epoch": 0.6678646645090932, "grad_norm": 1.7183451652526855, "learning_rate": 2.6245137433735636e-06, "loss": 0.7626, "step": 18908 }, { "epoch": 0.667899986312801, "grad_norm": 1.5811200141906738, "learning_rate": 2.624010428383244e-06, "loss": 0.7663, "step": 18909 }, { "epoch": 0.6679353081165089, "grad_norm": 1.7350525856018066, "learning_rate": 2.623507144489428e-06, "loss": 0.7898, "step": 18910 }, { "epoch": 0.6679706299202168, "grad_norm": 1.6588413715362549, "learning_rate": 2.623003891698704e-06, "loss": 0.7826, "step": 18911 }, { "epoch": 0.6680059517239247, "grad_norm": 1.6292814016342163, "learning_rate": 2.6225006700176598e-06, "loss": 0.7694, "step": 18912 }, { "epoch": 0.6680412735276327, "grad_norm": 1.6346772909164429, "learning_rate": 2.621997479452878e-06, "loss": 0.737, "step": 18913 }, { "epoch": 0.6680765953313406, "grad_norm": 1.8458101749420166, "learning_rate": 2.621494320010946e-06, "loss": 0.7904, "step": 18914 }, { "epoch": 0.6681119171350485, "grad_norm": 1.6281862258911133, "learning_rate": 2.6209911916984514e-06, "loss": 0.7943, "step": 18915 }, { "epoch": 0.6681472389387564, "grad_norm": 1.7350215911865234, "learning_rate": 2.620488094521975e-06, "loss": 0.7789, "step": 18916 }, { "epoch": 0.6681825607424643, "grad_norm": 1.8746967315673828, "learning_rate": 2.6199850284881034e-06, "loss": 0.7404, "step": 18917 }, { "epoch": 0.6682178825461722, "grad_norm": 1.754263997077942, "learning_rate": 2.6194819936034223e-06, "loss": 0.778, "step": 18918 }, { "epoch": 0.6682532043498801, "grad_norm": 1.7863463163375854, "learning_rate": 2.6189789898745104e-06, "loss": 0.7636, "step": 18919 }, { "epoch": 0.668288526153588, "grad_norm": 1.7374845743179321, "learning_rate": 2.618476017307955e-06, "loss": 0.8147, "step": 18920 }, { "epoch": 0.6683238479572959, "grad_norm": 1.6267551183700562, "learning_rate": 2.6179730759103394e-06, "loss": 0.7766, "step": 18921 }, { "epoch": 0.6683591697610038, "grad_norm": 1.5334844589233398, "learning_rate": 2.617470165688244e-06, "loss": 0.7633, "step": 18922 }, { "epoch": 0.6683944915647118, "grad_norm": 1.5760821104049683, "learning_rate": 2.61696728664825e-06, "loss": 0.74, "step": 18923 }, { "epoch": 0.6684298133684197, "grad_norm": 1.5890169143676758, "learning_rate": 2.6164644387969397e-06, "loss": 0.7608, "step": 18924 }, { "epoch": 0.6684651351721276, "grad_norm": 1.6993643045425415, "learning_rate": 2.6159616221408967e-06, "loss": 0.8, "step": 18925 }, { "epoch": 0.6685004569758355, "grad_norm": 1.732742190361023, "learning_rate": 2.6154588366866985e-06, "loss": 0.7366, "step": 18926 }, { "epoch": 0.6685357787795434, "grad_norm": 1.8318147659301758, "learning_rate": 2.614956082440926e-06, "loss": 0.8001, "step": 18927 }, { "epoch": 0.6685711005832513, "grad_norm": 1.66086745262146, "learning_rate": 2.6144533594101618e-06, "loss": 0.7585, "step": 18928 }, { "epoch": 0.6686064223869592, "grad_norm": 1.6955384016036987, "learning_rate": 2.6139506676009823e-06, "loss": 0.7988, "step": 18929 }, { "epoch": 0.6686417441906671, "grad_norm": 1.7543220520019531, "learning_rate": 2.6134480070199675e-06, "loss": 0.8076, "step": 18930 }, { "epoch": 0.668677065994375, "grad_norm": 1.6817065477371216, "learning_rate": 2.612945377673699e-06, "loss": 0.788, "step": 18931 }, { "epoch": 0.668712387798083, "grad_norm": 1.947274923324585, "learning_rate": 2.61244277956875e-06, "loss": 0.8147, "step": 18932 }, { "epoch": 0.6687477096017909, "grad_norm": 1.8999717235565186, "learning_rate": 2.611940212711702e-06, "loss": 0.765, "step": 18933 }, { "epoch": 0.6687830314054988, "grad_norm": 1.5516420602798462, "learning_rate": 2.6114376771091333e-06, "loss": 0.7424, "step": 18934 }, { "epoch": 0.6688183532092066, "grad_norm": 2.0552420616149902, "learning_rate": 2.610935172767617e-06, "loss": 0.7744, "step": 18935 }, { "epoch": 0.6688536750129145, "grad_norm": 1.8534754514694214, "learning_rate": 2.6104326996937324e-06, "loss": 0.779, "step": 18936 }, { "epoch": 0.6688889968166224, "grad_norm": 1.4633677005767822, "learning_rate": 2.609930257894055e-06, "loss": 0.7823, "step": 18937 }, { "epoch": 0.6689243186203303, "grad_norm": 1.6763137578964233, "learning_rate": 2.6094278473751635e-06, "loss": 0.7631, "step": 18938 }, { "epoch": 0.6689596404240382, "grad_norm": 2.405214786529541, "learning_rate": 2.6089254681436303e-06, "loss": 0.8105, "step": 18939 }, { "epoch": 0.6689949622277461, "grad_norm": 2.339953899383545, "learning_rate": 2.6084231202060296e-06, "loss": 0.7972, "step": 18940 }, { "epoch": 0.669030284031454, "grad_norm": 1.8961130380630493, "learning_rate": 2.6079208035689396e-06, "loss": 0.7831, "step": 18941 }, { "epoch": 0.669065605835162, "grad_norm": 1.8340585231781006, "learning_rate": 2.60741851823893e-06, "loss": 0.7916, "step": 18942 }, { "epoch": 0.6691009276388699, "grad_norm": 1.7727104425430298, "learning_rate": 2.6069162642225775e-06, "loss": 0.7529, "step": 18943 }, { "epoch": 0.6691362494425778, "grad_norm": 1.595119833946228, "learning_rate": 2.606414041526457e-06, "loss": 0.757, "step": 18944 }, { "epoch": 0.6691715712462857, "grad_norm": 1.70108962059021, "learning_rate": 2.605911850157138e-06, "loss": 0.7428, "step": 18945 }, { "epoch": 0.6692068930499936, "grad_norm": 1.7448432445526123, "learning_rate": 2.6054096901211938e-06, "loss": 0.7704, "step": 18946 }, { "epoch": 0.6692422148537015, "grad_norm": 1.7328287363052368, "learning_rate": 2.6049075614251983e-06, "loss": 0.7521, "step": 18947 }, { "epoch": 0.6692775366574094, "grad_norm": 1.5257362127304077, "learning_rate": 2.604405464075724e-06, "loss": 0.7866, "step": 18948 }, { "epoch": 0.6693128584611173, "grad_norm": 1.7534445524215698, "learning_rate": 2.603903398079338e-06, "loss": 0.773, "step": 18949 }, { "epoch": 0.6693481802648252, "grad_norm": 1.7664082050323486, "learning_rate": 2.603401363442615e-06, "loss": 0.7482, "step": 18950 }, { "epoch": 0.6693835020685331, "grad_norm": 1.719882607460022, "learning_rate": 2.6028993601721263e-06, "loss": 0.773, "step": 18951 }, { "epoch": 0.669418823872241, "grad_norm": 1.8269367218017578, "learning_rate": 2.602397388274438e-06, "loss": 0.7723, "step": 18952 }, { "epoch": 0.669454145675949, "grad_norm": 1.6687530279159546, "learning_rate": 2.6018954477561215e-06, "loss": 0.7732, "step": 18953 }, { "epoch": 0.6694894674796569, "grad_norm": 1.7349178791046143, "learning_rate": 2.601393538623749e-06, "loss": 0.7825, "step": 18954 }, { "epoch": 0.6695247892833648, "grad_norm": 1.9222980737686157, "learning_rate": 2.6008916608838854e-06, "loss": 0.7762, "step": 18955 }, { "epoch": 0.6695601110870727, "grad_norm": 1.5724341869354248, "learning_rate": 2.6003898145430995e-06, "loss": 0.7873, "step": 18956 }, { "epoch": 0.6695954328907806, "grad_norm": 1.6364903450012207, "learning_rate": 2.599887999607964e-06, "loss": 0.7413, "step": 18957 }, { "epoch": 0.6696307546944885, "grad_norm": 1.9188143014907837, "learning_rate": 2.5993862160850426e-06, "loss": 0.7973, "step": 18958 }, { "epoch": 0.6696660764981964, "grad_norm": 1.739600419998169, "learning_rate": 2.598884463980901e-06, "loss": 0.8123, "step": 18959 }, { "epoch": 0.6697013983019043, "grad_norm": 1.6655973196029663, "learning_rate": 2.598382743302108e-06, "loss": 0.7878, "step": 18960 }, { "epoch": 0.6697367201056121, "grad_norm": 1.7703417539596558, "learning_rate": 2.5978810540552323e-06, "loss": 0.7485, "step": 18961 }, { "epoch": 0.66977204190932, "grad_norm": 1.940933108329773, "learning_rate": 2.5973793962468365e-06, "loss": 0.7707, "step": 18962 }, { "epoch": 0.669807363713028, "grad_norm": 1.855529546737671, "learning_rate": 2.596877769883487e-06, "loss": 0.7897, "step": 18963 }, { "epoch": 0.6698426855167359, "grad_norm": 1.7402939796447754, "learning_rate": 2.5963761749717518e-06, "loss": 0.7828, "step": 18964 }, { "epoch": 0.6698780073204438, "grad_norm": 1.6764042377471924, "learning_rate": 2.595874611518191e-06, "loss": 0.755, "step": 18965 }, { "epoch": 0.6699133291241517, "grad_norm": 1.8951760530471802, "learning_rate": 2.595373079529372e-06, "loss": 0.8007, "step": 18966 }, { "epoch": 0.6699486509278596, "grad_norm": 1.7051565647125244, "learning_rate": 2.5948715790118596e-06, "loss": 0.7676, "step": 18967 }, { "epoch": 0.6699839727315675, "grad_norm": 2.0731353759765625, "learning_rate": 2.5943701099722134e-06, "loss": 0.7905, "step": 18968 }, { "epoch": 0.6700192945352754, "grad_norm": 1.620779037475586, "learning_rate": 2.593868672416999e-06, "loss": 0.7569, "step": 18969 }, { "epoch": 0.6700546163389833, "grad_norm": 1.6837189197540283, "learning_rate": 2.593367266352782e-06, "loss": 0.7871, "step": 18970 }, { "epoch": 0.6700899381426912, "grad_norm": 1.5648835897445679, "learning_rate": 2.5928658917861192e-06, "loss": 0.774, "step": 18971 }, { "epoch": 0.6701252599463992, "grad_norm": 1.9163522720336914, "learning_rate": 2.592364548723576e-06, "loss": 0.8299, "step": 18972 }, { "epoch": 0.6701605817501071, "grad_norm": 1.6152299642562866, "learning_rate": 2.591863237171714e-06, "loss": 0.7681, "step": 18973 }, { "epoch": 0.670195903553815, "grad_norm": 1.7170684337615967, "learning_rate": 2.5913619571370917e-06, "loss": 0.7806, "step": 18974 }, { "epoch": 0.6702312253575229, "grad_norm": 2.6816160678863525, "learning_rate": 2.590860708626271e-06, "loss": 0.778, "step": 18975 }, { "epoch": 0.6702665471612308, "grad_norm": 1.5453931093215942, "learning_rate": 2.590359491645815e-06, "loss": 0.7575, "step": 18976 }, { "epoch": 0.6703018689649387, "grad_norm": 1.5224792957305908, "learning_rate": 2.589858306202281e-06, "loss": 0.7509, "step": 18977 }, { "epoch": 0.6703371907686466, "grad_norm": 1.742501139640808, "learning_rate": 2.5893571523022253e-06, "loss": 0.7912, "step": 18978 }, { "epoch": 0.6703725125723545, "grad_norm": 1.7257516384124756, "learning_rate": 2.5888560299522115e-06, "loss": 0.7466, "step": 18979 }, { "epoch": 0.6704078343760624, "grad_norm": 1.6510359048843384, "learning_rate": 2.588354939158798e-06, "loss": 0.7795, "step": 18980 }, { "epoch": 0.6704431561797703, "grad_norm": 1.7788997888565063, "learning_rate": 2.5878538799285403e-06, "loss": 0.7992, "step": 18981 }, { "epoch": 0.6704784779834783, "grad_norm": 1.70827054977417, "learning_rate": 2.587352852267997e-06, "loss": 0.7353, "step": 18982 }, { "epoch": 0.6705137997871862, "grad_norm": 1.7855035066604614, "learning_rate": 2.5868518561837274e-06, "loss": 0.7925, "step": 18983 }, { "epoch": 0.6705491215908941, "grad_norm": 1.8567336797714233, "learning_rate": 2.5863508916822865e-06, "loss": 0.7998, "step": 18984 }, { "epoch": 0.670584443394602, "grad_norm": 1.7587718963623047, "learning_rate": 2.58584995877023e-06, "loss": 0.7866, "step": 18985 }, { "epoch": 0.6706197651983099, "grad_norm": 1.6588215827941895, "learning_rate": 2.5853490574541185e-06, "loss": 0.7899, "step": 18986 }, { "epoch": 0.6706550870020178, "grad_norm": 1.6936413049697876, "learning_rate": 2.584848187740503e-06, "loss": 0.757, "step": 18987 }, { "epoch": 0.6706904088057256, "grad_norm": 1.5336240530014038, "learning_rate": 2.5843473496359393e-06, "loss": 0.7585, "step": 18988 }, { "epoch": 0.6707257306094335, "grad_norm": 1.6518056392669678, "learning_rate": 2.583846543146986e-06, "loss": 0.7607, "step": 18989 }, { "epoch": 0.6707610524131414, "grad_norm": 1.522089958190918, "learning_rate": 2.5833457682801925e-06, "loss": 0.742, "step": 18990 }, { "epoch": 0.6707963742168493, "grad_norm": 1.8210110664367676, "learning_rate": 2.5828450250421156e-06, "loss": 0.802, "step": 18991 }, { "epoch": 0.6708316960205573, "grad_norm": 1.7752784490585327, "learning_rate": 2.582344313439311e-06, "loss": 0.8002, "step": 18992 }, { "epoch": 0.6708670178242652, "grad_norm": 1.9312264919281006, "learning_rate": 2.581843633478327e-06, "loss": 0.754, "step": 18993 }, { "epoch": 0.6709023396279731, "grad_norm": 1.6508498191833496, "learning_rate": 2.581342985165719e-06, "loss": 0.7724, "step": 18994 }, { "epoch": 0.670937661431681, "grad_norm": 1.698032259941101, "learning_rate": 2.5808423685080418e-06, "loss": 0.8104, "step": 18995 }, { "epoch": 0.6709729832353889, "grad_norm": 1.5799297094345093, "learning_rate": 2.5803417835118444e-06, "loss": 0.7516, "step": 18996 }, { "epoch": 0.6710083050390968, "grad_norm": 1.711419701576233, "learning_rate": 2.579841230183677e-06, "loss": 0.8064, "step": 18997 }, { "epoch": 0.6710436268428047, "grad_norm": 1.5990710258483887, "learning_rate": 2.579340708530093e-06, "loss": 0.755, "step": 18998 }, { "epoch": 0.6710789486465126, "grad_norm": 1.72321355342865, "learning_rate": 2.5788402185576443e-06, "loss": 0.7652, "step": 18999 }, { "epoch": 0.6711142704502205, "grad_norm": 1.7024630308151245, "learning_rate": 2.578339760272878e-06, "loss": 0.7754, "step": 19000 }, { "epoch": 0.6711495922539285, "grad_norm": 1.7307744026184082, "learning_rate": 2.577839333682346e-06, "loss": 0.8013, "step": 19001 }, { "epoch": 0.6711849140576364, "grad_norm": 2.28043270111084, "learning_rate": 2.577338938792599e-06, "loss": 0.7949, "step": 19002 }, { "epoch": 0.6712202358613443, "grad_norm": 1.587998390197754, "learning_rate": 2.5768385756101825e-06, "loss": 0.7845, "step": 19003 }, { "epoch": 0.6712555576650522, "grad_norm": 0.9423118233680725, "learning_rate": 2.5763382441416472e-06, "loss": 0.5739, "step": 19004 }, { "epoch": 0.6712908794687601, "grad_norm": 1.7826634645462036, "learning_rate": 2.575837944393542e-06, "loss": 0.7714, "step": 19005 }, { "epoch": 0.671326201272468, "grad_norm": 1.7081979513168335, "learning_rate": 2.575337676372416e-06, "loss": 0.7899, "step": 19006 }, { "epoch": 0.6713615230761759, "grad_norm": 1.6461524963378906, "learning_rate": 2.574837440084812e-06, "loss": 0.7345, "step": 19007 }, { "epoch": 0.6713968448798838, "grad_norm": 1.705896019935608, "learning_rate": 2.57433723553728e-06, "loss": 0.7599, "step": 19008 }, { "epoch": 0.6714321666835917, "grad_norm": 1.744918704032898, "learning_rate": 2.5738370627363683e-06, "loss": 0.7961, "step": 19009 }, { "epoch": 0.6714674884872996, "grad_norm": 1.7201730012893677, "learning_rate": 2.573336921688619e-06, "loss": 0.7642, "step": 19010 }, { "epoch": 0.6715028102910076, "grad_norm": 1.8498066663742065, "learning_rate": 2.57283681240058e-06, "loss": 0.748, "step": 19011 }, { "epoch": 0.6715381320947155, "grad_norm": 1.6974587440490723, "learning_rate": 2.5723367348787988e-06, "loss": 0.7752, "step": 19012 }, { "epoch": 0.6715734538984234, "grad_norm": 1.9881314039230347, "learning_rate": 2.5718366891298176e-06, "loss": 0.816, "step": 19013 }, { "epoch": 0.6716087757021312, "grad_norm": 1.6148680448532104, "learning_rate": 2.57133667516018e-06, "loss": 0.7625, "step": 19014 }, { "epoch": 0.6716440975058391, "grad_norm": 1.8374415636062622, "learning_rate": 2.5708366929764305e-06, "loss": 0.8166, "step": 19015 }, { "epoch": 0.671679419309547, "grad_norm": 1.706797480583191, "learning_rate": 2.5703367425851167e-06, "loss": 0.7699, "step": 19016 }, { "epoch": 0.6717147411132549, "grad_norm": 1.8317724466323853, "learning_rate": 2.569836823992776e-06, "loss": 0.754, "step": 19017 }, { "epoch": 0.6717500629169628, "grad_norm": 1.702110767364502, "learning_rate": 2.5693369372059545e-06, "loss": 0.7427, "step": 19018 }, { "epoch": 0.6717853847206707, "grad_norm": 1.7756192684173584, "learning_rate": 2.5688370822311963e-06, "loss": 0.8205, "step": 19019 }, { "epoch": 0.6718207065243786, "grad_norm": 1.7475390434265137, "learning_rate": 2.56833725907504e-06, "loss": 0.7924, "step": 19020 }, { "epoch": 0.6718560283280866, "grad_norm": 1.7455312013626099, "learning_rate": 2.5678374677440277e-06, "loss": 0.7883, "step": 19021 }, { "epoch": 0.6718913501317945, "grad_norm": 2.4977428913116455, "learning_rate": 2.5673377082447035e-06, "loss": 0.7573, "step": 19022 }, { "epoch": 0.6719266719355024, "grad_norm": 1.732239007949829, "learning_rate": 2.5668379805836043e-06, "loss": 0.8073, "step": 19023 }, { "epoch": 0.6719619937392103, "grad_norm": 1.6101717948913574, "learning_rate": 2.5663382847672724e-06, "loss": 0.7603, "step": 19024 }, { "epoch": 0.6719973155429182, "grad_norm": 1.6513738632202148, "learning_rate": 2.565838620802249e-06, "loss": 0.7609, "step": 19025 }, { "epoch": 0.6720326373466261, "grad_norm": 1.660163402557373, "learning_rate": 2.565338988695071e-06, "loss": 0.7665, "step": 19026 }, { "epoch": 0.672067959150334, "grad_norm": 1.7151464223861694, "learning_rate": 2.5648393884522776e-06, "loss": 0.769, "step": 19027 }, { "epoch": 0.6721032809540419, "grad_norm": 1.7903239727020264, "learning_rate": 2.5643398200804107e-06, "loss": 0.798, "step": 19028 }, { "epoch": 0.6721386027577498, "grad_norm": 1.5336710214614868, "learning_rate": 2.5638402835860054e-06, "loss": 0.7613, "step": 19029 }, { "epoch": 0.6721739245614577, "grad_norm": 1.8014124631881714, "learning_rate": 2.5633407789755994e-06, "loss": 0.7703, "step": 19030 }, { "epoch": 0.6722092463651657, "grad_norm": 1.6729446649551392, "learning_rate": 2.562841306255734e-06, "loss": 0.7741, "step": 19031 }, { "epoch": 0.6722445681688736, "grad_norm": 1.8214054107666016, "learning_rate": 2.562341865432943e-06, "loss": 0.8213, "step": 19032 }, { "epoch": 0.6722798899725815, "grad_norm": 1.6037652492523193, "learning_rate": 2.561842456513761e-06, "loss": 0.7648, "step": 19033 }, { "epoch": 0.6723152117762894, "grad_norm": 1.7811636924743652, "learning_rate": 2.561343079504728e-06, "loss": 0.784, "step": 19034 }, { "epoch": 0.6723505335799973, "grad_norm": 1.806389570236206, "learning_rate": 2.5608437344123794e-06, "loss": 0.8057, "step": 19035 }, { "epoch": 0.6723858553837052, "grad_norm": 1.582994818687439, "learning_rate": 2.560344421243247e-06, "loss": 0.7442, "step": 19036 }, { "epoch": 0.6724211771874131, "grad_norm": 1.665361762046814, "learning_rate": 2.559845140003869e-06, "loss": 0.7398, "step": 19037 }, { "epoch": 0.672456498991121, "grad_norm": 5.461582183837891, "learning_rate": 2.5593458907007813e-06, "loss": 0.7714, "step": 19038 }, { "epoch": 0.6724918207948289, "grad_norm": 1.9056928157806396, "learning_rate": 2.5588466733405138e-06, "loss": 0.8336, "step": 19039 }, { "epoch": 0.6725271425985367, "grad_norm": 1.8304640054702759, "learning_rate": 2.558347487929601e-06, "loss": 0.8219, "step": 19040 }, { "epoch": 0.6725624644022447, "grad_norm": 1.6437312364578247, "learning_rate": 2.557848334474581e-06, "loss": 0.7398, "step": 19041 }, { "epoch": 0.6725977862059526, "grad_norm": 1.837005615234375, "learning_rate": 2.55734921298198e-06, "loss": 0.7819, "step": 19042 }, { "epoch": 0.6726331080096605, "grad_norm": 1.9736058712005615, "learning_rate": 2.556850123458333e-06, "loss": 0.7711, "step": 19043 }, { "epoch": 0.6726684298133684, "grad_norm": 1.6011873483657837, "learning_rate": 2.5563510659101742e-06, "loss": 0.7311, "step": 19044 }, { "epoch": 0.6727037516170763, "grad_norm": 1.5560922622680664, "learning_rate": 2.5558520403440322e-06, "loss": 0.7711, "step": 19045 }, { "epoch": 0.6727390734207842, "grad_norm": 1.7246668338775635, "learning_rate": 2.555353046766439e-06, "loss": 0.8071, "step": 19046 }, { "epoch": 0.6727743952244921, "grad_norm": 1.715928554534912, "learning_rate": 2.554854085183927e-06, "loss": 0.7788, "step": 19047 }, { "epoch": 0.6728097170282, "grad_norm": 1.556158185005188, "learning_rate": 2.554355155603023e-06, "loss": 0.7839, "step": 19048 }, { "epoch": 0.6728450388319079, "grad_norm": 1.7143253087997437, "learning_rate": 2.55385625803026e-06, "loss": 0.7849, "step": 19049 }, { "epoch": 0.6728803606356158, "grad_norm": 1.7296048402786255, "learning_rate": 2.553357392472168e-06, "loss": 0.7985, "step": 19050 }, { "epoch": 0.6729156824393238, "grad_norm": 1.623579740524292, "learning_rate": 2.5528585589352743e-06, "loss": 0.7875, "step": 19051 }, { "epoch": 0.6729510042430317, "grad_norm": 1.790149450302124, "learning_rate": 2.552359757426106e-06, "loss": 0.8045, "step": 19052 }, { "epoch": 0.6729863260467396, "grad_norm": 1.6347583532333374, "learning_rate": 2.5518609879511936e-06, "loss": 0.7764, "step": 19053 }, { "epoch": 0.6730216478504475, "grad_norm": 1.7472034692764282, "learning_rate": 2.5513622505170652e-06, "loss": 0.7612, "step": 19054 }, { "epoch": 0.6730569696541554, "grad_norm": 1.782718539237976, "learning_rate": 2.5508635451302468e-06, "loss": 0.791, "step": 19055 }, { "epoch": 0.6730922914578633, "grad_norm": 1.6830519437789917, "learning_rate": 2.5503648717972646e-06, "loss": 0.7832, "step": 19056 }, { "epoch": 0.6731276132615712, "grad_norm": 1.6536517143249512, "learning_rate": 2.5498662305246493e-06, "loss": 0.7819, "step": 19057 }, { "epoch": 0.6731629350652791, "grad_norm": 1.8097965717315674, "learning_rate": 2.5493676213189217e-06, "loss": 0.7804, "step": 19058 }, { "epoch": 0.673198256868987, "grad_norm": 1.9424543380737305, "learning_rate": 2.5488690441866105e-06, "loss": 0.7463, "step": 19059 }, { "epoch": 0.673233578672695, "grad_norm": 1.674813985824585, "learning_rate": 2.548370499134242e-06, "loss": 0.7477, "step": 19060 }, { "epoch": 0.6732689004764029, "grad_norm": 1.6042999029159546, "learning_rate": 2.5478719861683376e-06, "loss": 0.7697, "step": 19061 }, { "epoch": 0.6733042222801108, "grad_norm": 1.837789535522461, "learning_rate": 2.5473735052954242e-06, "loss": 0.8063, "step": 19062 }, { "epoch": 0.6733395440838187, "grad_norm": 1.7617168426513672, "learning_rate": 2.5468750565220246e-06, "loss": 0.7822, "step": 19063 }, { "epoch": 0.6733748658875266, "grad_norm": 1.6719975471496582, "learning_rate": 2.546376639854665e-06, "loss": 0.7646, "step": 19064 }, { "epoch": 0.6734101876912345, "grad_norm": 1.695882797241211, "learning_rate": 2.545878255299866e-06, "loss": 0.8085, "step": 19065 }, { "epoch": 0.6734455094949423, "grad_norm": 1.5341626405715942, "learning_rate": 2.54537990286415e-06, "loss": 0.7384, "step": 19066 }, { "epoch": 0.6734808312986502, "grad_norm": 1.7571823596954346, "learning_rate": 2.544881582554043e-06, "loss": 0.8, "step": 19067 }, { "epoch": 0.6735161531023581, "grad_norm": 1.6544915437698364, "learning_rate": 2.5443832943760618e-06, "loss": 0.7922, "step": 19068 }, { "epoch": 0.673551474906066, "grad_norm": 1.6852318048477173, "learning_rate": 2.5438850383367325e-06, "loss": 0.7828, "step": 19069 }, { "epoch": 0.673586796709774, "grad_norm": 2.076669454574585, "learning_rate": 2.5433868144425746e-06, "loss": 0.7639, "step": 19070 }, { "epoch": 0.6736221185134819, "grad_norm": 1.5281236171722412, "learning_rate": 2.542888622700106e-06, "loss": 0.7365, "step": 19071 }, { "epoch": 0.6736574403171898, "grad_norm": 1.756350040435791, "learning_rate": 2.542390463115849e-06, "loss": 0.7726, "step": 19072 }, { "epoch": 0.6736927621208977, "grad_norm": 1.6095693111419678, "learning_rate": 2.5418923356963233e-06, "loss": 0.8025, "step": 19073 }, { "epoch": 0.6737280839246056, "grad_norm": 2.271421194076538, "learning_rate": 2.541394240448052e-06, "loss": 0.8039, "step": 19074 }, { "epoch": 0.6737634057283135, "grad_norm": 1.5868662595748901, "learning_rate": 2.5408961773775477e-06, "loss": 0.7773, "step": 19075 }, { "epoch": 0.6737987275320214, "grad_norm": 1.5073925256729126, "learning_rate": 2.5403981464913317e-06, "loss": 0.7496, "step": 19076 }, { "epoch": 0.6738340493357293, "grad_norm": 1.643341302871704, "learning_rate": 2.539900147795925e-06, "loss": 0.7641, "step": 19077 }, { "epoch": 0.6738693711394372, "grad_norm": 1.9045590162277222, "learning_rate": 2.5394021812978405e-06, "loss": 0.7611, "step": 19078 }, { "epoch": 0.6739046929431451, "grad_norm": 1.6302000284194946, "learning_rate": 2.5389042470035976e-06, "loss": 0.7685, "step": 19079 }, { "epoch": 0.6739400147468531, "grad_norm": 1.7634737491607666, "learning_rate": 2.5384063449197154e-06, "loss": 0.7599, "step": 19080 }, { "epoch": 0.673975336550561, "grad_norm": 1.5065711736679077, "learning_rate": 2.537908475052706e-06, "loss": 0.7554, "step": 19081 }, { "epoch": 0.6740106583542689, "grad_norm": 2.566577672958374, "learning_rate": 2.5374106374090885e-06, "loss": 0.7998, "step": 19082 }, { "epoch": 0.6740459801579768, "grad_norm": 2.715822696685791, "learning_rate": 2.5369128319953788e-06, "loss": 0.7526, "step": 19083 }, { "epoch": 0.6740813019616847, "grad_norm": 1.6337311267852783, "learning_rate": 2.536415058818089e-06, "loss": 0.7689, "step": 19084 }, { "epoch": 0.6741166237653926, "grad_norm": 1.5218843221664429, "learning_rate": 2.5359173178837363e-06, "loss": 0.7514, "step": 19085 }, { "epoch": 0.6741519455691005, "grad_norm": 1.7850428819656372, "learning_rate": 2.5354196091988365e-06, "loss": 0.7706, "step": 19086 }, { "epoch": 0.6741872673728084, "grad_norm": 1.8441094160079956, "learning_rate": 2.5349219327699017e-06, "loss": 0.7915, "step": 19087 }, { "epoch": 0.6742225891765163, "grad_norm": 1.595037817955017, "learning_rate": 2.534424288603442e-06, "loss": 0.7406, "step": 19088 }, { "epoch": 0.6742579109802243, "grad_norm": 1.6043497323989868, "learning_rate": 2.533926676705975e-06, "loss": 0.7102, "step": 19089 }, { "epoch": 0.6742932327839322, "grad_norm": 2.005575656890869, "learning_rate": 2.5334290970840135e-06, "loss": 0.7958, "step": 19090 }, { "epoch": 0.6743285545876401, "grad_norm": 1.7236655950546265, "learning_rate": 2.5329315497440667e-06, "loss": 0.7641, "step": 19091 }, { "epoch": 0.6743638763913479, "grad_norm": 1.5833094120025635, "learning_rate": 2.5324340346926477e-06, "loss": 0.7498, "step": 19092 }, { "epoch": 0.6743991981950558, "grad_norm": 1.5936191082000732, "learning_rate": 2.5319365519362704e-06, "loss": 0.8095, "step": 19093 }, { "epoch": 0.6744345199987637, "grad_norm": 1.6576588153839111, "learning_rate": 2.531439101481442e-06, "loss": 0.7624, "step": 19094 }, { "epoch": 0.6744698418024716, "grad_norm": 1.5524555444717407, "learning_rate": 2.530941683334674e-06, "loss": 0.7462, "step": 19095 }, { "epoch": 0.6745051636061795, "grad_norm": 1.8369807004928589, "learning_rate": 2.530444297502479e-06, "loss": 0.7956, "step": 19096 }, { "epoch": 0.6745404854098874, "grad_norm": 1.5921911001205444, "learning_rate": 2.5299469439913634e-06, "loss": 0.7748, "step": 19097 }, { "epoch": 0.6745758072135953, "grad_norm": 1.5462932586669922, "learning_rate": 2.5294496228078374e-06, "loss": 0.7437, "step": 19098 }, { "epoch": 0.6746111290173032, "grad_norm": 1.6377381086349487, "learning_rate": 2.5289523339584123e-06, "loss": 0.7534, "step": 19099 }, { "epoch": 0.6746464508210112, "grad_norm": 0.9514378309249878, "learning_rate": 2.5284550774495932e-06, "loss": 0.5826, "step": 19100 }, { "epoch": 0.6746817726247191, "grad_norm": 1.7218735218048096, "learning_rate": 2.5279578532878884e-06, "loss": 0.796, "step": 19101 }, { "epoch": 0.674717094428427, "grad_norm": 1.7637922763824463, "learning_rate": 2.5274606614798093e-06, "loss": 0.7704, "step": 19102 }, { "epoch": 0.6747524162321349, "grad_norm": 1.7527867555618286, "learning_rate": 2.5269635020318582e-06, "loss": 0.7657, "step": 19103 }, { "epoch": 0.6747877380358428, "grad_norm": 1.6100140810012817, "learning_rate": 2.5264663749505434e-06, "loss": 0.7475, "step": 19104 }, { "epoch": 0.6748230598395507, "grad_norm": 1.8265018463134766, "learning_rate": 2.5259692802423742e-06, "loss": 0.8176, "step": 19105 }, { "epoch": 0.6748583816432586, "grad_norm": 1.8219635486602783, "learning_rate": 2.5254722179138534e-06, "loss": 0.7928, "step": 19106 }, { "epoch": 0.6748937034469665, "grad_norm": 1.7268571853637695, "learning_rate": 2.5249751879714857e-06, "loss": 0.7913, "step": 19107 }, { "epoch": 0.6749290252506744, "grad_norm": 1.8047233819961548, "learning_rate": 2.5244781904217775e-06, "loss": 0.7803, "step": 19108 }, { "epoch": 0.6749643470543824, "grad_norm": 1.5881986618041992, "learning_rate": 2.5239812252712346e-06, "loss": 0.8012, "step": 19109 }, { "epoch": 0.6749996688580903, "grad_norm": 1.6411211490631104, "learning_rate": 2.5234842925263592e-06, "loss": 0.7742, "step": 19110 }, { "epoch": 0.6750349906617982, "grad_norm": 1.662366271018982, "learning_rate": 2.5229873921936553e-06, "loss": 0.7775, "step": 19111 }, { "epoch": 0.6750703124655061, "grad_norm": 1.5620030164718628, "learning_rate": 2.5224905242796284e-06, "loss": 0.7443, "step": 19112 }, { "epoch": 0.675105634269214, "grad_norm": 1.6539051532745361, "learning_rate": 2.5219936887907782e-06, "loss": 0.7758, "step": 19113 }, { "epoch": 0.6751409560729219, "grad_norm": 1.5539064407348633, "learning_rate": 2.521496885733609e-06, "loss": 0.7544, "step": 19114 }, { "epoch": 0.6751762778766298, "grad_norm": 1.6271291971206665, "learning_rate": 2.521000115114624e-06, "loss": 0.771, "step": 19115 }, { "epoch": 0.6752115996803377, "grad_norm": 1.6728465557098389, "learning_rate": 2.520503376940322e-06, "loss": 0.755, "step": 19116 }, { "epoch": 0.6752469214840456, "grad_norm": 5.837282657623291, "learning_rate": 2.5200066712172057e-06, "loss": 0.7961, "step": 19117 }, { "epoch": 0.6752822432877534, "grad_norm": 1.7665404081344604, "learning_rate": 2.519509997951778e-06, "loss": 0.7476, "step": 19118 }, { "epoch": 0.6753175650914613, "grad_norm": 1.9008784294128418, "learning_rate": 2.5190133571505347e-06, "loss": 0.8019, "step": 19119 }, { "epoch": 0.6753528868951693, "grad_norm": 1.685602068901062, "learning_rate": 2.518516748819978e-06, "loss": 0.8018, "step": 19120 }, { "epoch": 0.6753882086988772, "grad_norm": 1.633940577507019, "learning_rate": 2.5180201729666084e-06, "loss": 0.7925, "step": 19121 }, { "epoch": 0.6754235305025851, "grad_norm": 1.6351667642593384, "learning_rate": 2.517523629596926e-06, "loss": 0.7844, "step": 19122 }, { "epoch": 0.675458852306293, "grad_norm": 1.6669402122497559, "learning_rate": 2.5170271187174256e-06, "loss": 0.7562, "step": 19123 }, { "epoch": 0.6754941741100009, "grad_norm": 1.721089243888855, "learning_rate": 2.516530640334609e-06, "loss": 0.7987, "step": 19124 }, { "epoch": 0.6755294959137088, "grad_norm": 1.5662697553634644, "learning_rate": 2.5160341944549726e-06, "loss": 0.741, "step": 19125 }, { "epoch": 0.6755648177174167, "grad_norm": 1.708683967590332, "learning_rate": 2.5155377810850125e-06, "loss": 0.8313, "step": 19126 }, { "epoch": 0.6756001395211246, "grad_norm": 1.6709609031677246, "learning_rate": 2.5150414002312263e-06, "loss": 0.8047, "step": 19127 }, { "epoch": 0.6756354613248325, "grad_norm": 1.5566861629486084, "learning_rate": 2.5145450519001134e-06, "loss": 0.7445, "step": 19128 }, { "epoch": 0.6756707831285405, "grad_norm": 2.1071763038635254, "learning_rate": 2.5140487360981658e-06, "loss": 0.7698, "step": 19129 }, { "epoch": 0.6757061049322484, "grad_norm": 1.6754376888275146, "learning_rate": 2.513552452831881e-06, "loss": 0.8036, "step": 19130 }, { "epoch": 0.6757414267359563, "grad_norm": 1.630307912826538, "learning_rate": 2.5130562021077543e-06, "loss": 0.761, "step": 19131 }, { "epoch": 0.6757767485396642, "grad_norm": 1.5783394575119019, "learning_rate": 2.5125599839322822e-06, "loss": 0.786, "step": 19132 }, { "epoch": 0.6758120703433721, "grad_norm": 1.674573302268982, "learning_rate": 2.5120637983119557e-06, "loss": 0.7454, "step": 19133 }, { "epoch": 0.67584739214708, "grad_norm": 1.7982312440872192, "learning_rate": 2.51156764525327e-06, "loss": 0.798, "step": 19134 }, { "epoch": 0.6758827139507879, "grad_norm": 1.5552587509155273, "learning_rate": 2.5110715247627214e-06, "loss": 0.8023, "step": 19135 }, { "epoch": 0.6759180357544958, "grad_norm": 1.6755855083465576, "learning_rate": 2.5105754368467996e-06, "loss": 0.7587, "step": 19136 }, { "epoch": 0.6759533575582037, "grad_norm": 1.6189645528793335, "learning_rate": 2.510079381511998e-06, "loss": 0.7963, "step": 19137 }, { "epoch": 0.6759886793619116, "grad_norm": 1.6543700695037842, "learning_rate": 2.509583358764811e-06, "loss": 0.7584, "step": 19138 }, { "epoch": 0.6760240011656196, "grad_norm": 1.5919100046157837, "learning_rate": 2.5090873686117274e-06, "loss": 0.7849, "step": 19139 }, { "epoch": 0.6760593229693275, "grad_norm": 1.4996823072433472, "learning_rate": 2.5085914110592397e-06, "loss": 0.7755, "step": 19140 }, { "epoch": 0.6760946447730354, "grad_norm": 1.7096506357192993, "learning_rate": 2.5080954861138418e-06, "loss": 0.7475, "step": 19141 }, { "epoch": 0.6761299665767433, "grad_norm": 1.8039891719818115, "learning_rate": 2.5075995937820207e-06, "loss": 0.7843, "step": 19142 }, { "epoch": 0.6761652883804512, "grad_norm": 1.7691608667373657, "learning_rate": 2.5071037340702663e-06, "loss": 0.7872, "step": 19143 }, { "epoch": 0.676200610184159, "grad_norm": 1.7622880935668945, "learning_rate": 2.50660790698507e-06, "loss": 0.7736, "step": 19144 }, { "epoch": 0.6762359319878669, "grad_norm": 1.9519294500350952, "learning_rate": 2.5061121125329226e-06, "loss": 0.79, "step": 19145 }, { "epoch": 0.6762712537915748, "grad_norm": 1.6715872287750244, "learning_rate": 2.5056163507203084e-06, "loss": 0.768, "step": 19146 }, { "epoch": 0.6763065755952827, "grad_norm": 1.7561300992965698, "learning_rate": 2.5051206215537195e-06, "loss": 0.7647, "step": 19147 }, { "epoch": 0.6763418973989906, "grad_norm": 1.6015323400497437, "learning_rate": 2.504624925039644e-06, "loss": 0.7584, "step": 19148 }, { "epoch": 0.6763772192026986, "grad_norm": 1.6530925035476685, "learning_rate": 2.504129261184567e-06, "loss": 0.7804, "step": 19149 }, { "epoch": 0.6764125410064065, "grad_norm": 1.5014759302139282, "learning_rate": 2.5036336299949765e-06, "loss": 0.7818, "step": 19150 }, { "epoch": 0.6764478628101144, "grad_norm": 1.660914659500122, "learning_rate": 2.503138031477362e-06, "loss": 0.7786, "step": 19151 }, { "epoch": 0.6764831846138223, "grad_norm": 1.5519049167633057, "learning_rate": 2.502642465638205e-06, "loss": 0.7513, "step": 19152 }, { "epoch": 0.6765185064175302, "grad_norm": 1.471593976020813, "learning_rate": 2.502146932483995e-06, "loss": 0.759, "step": 19153 }, { "epoch": 0.6765538282212381, "grad_norm": 1.754777431488037, "learning_rate": 2.5016514320212183e-06, "loss": 0.7822, "step": 19154 }, { "epoch": 0.676589150024946, "grad_norm": 1.714140772819519, "learning_rate": 2.5011559642563556e-06, "loss": 0.7592, "step": 19155 }, { "epoch": 0.6766244718286539, "grad_norm": 1.5420364141464233, "learning_rate": 2.500660529195894e-06, "loss": 0.7481, "step": 19156 }, { "epoch": 0.6766597936323618, "grad_norm": 1.9550397396087646, "learning_rate": 2.50016512684632e-06, "loss": 0.7697, "step": 19157 }, { "epoch": 0.6766951154360697, "grad_norm": 1.6384344100952148, "learning_rate": 2.499669757214112e-06, "loss": 0.7858, "step": 19158 }, { "epoch": 0.6767304372397777, "grad_norm": 1.8326129913330078, "learning_rate": 2.4991744203057574e-06, "loss": 0.7737, "step": 19159 }, { "epoch": 0.6767657590434856, "grad_norm": 1.6334409713745117, "learning_rate": 2.498679116127739e-06, "loss": 0.7416, "step": 19160 }, { "epoch": 0.6768010808471935, "grad_norm": 1.5430480241775513, "learning_rate": 2.498183844686538e-06, "loss": 0.7595, "step": 19161 }, { "epoch": 0.6768364026509014, "grad_norm": 1.613508701324463, "learning_rate": 2.497688605988635e-06, "loss": 0.7808, "step": 19162 }, { "epoch": 0.6768717244546093, "grad_norm": 1.5278207063674927, "learning_rate": 2.497193400040513e-06, "loss": 0.7608, "step": 19163 }, { "epoch": 0.6769070462583172, "grad_norm": 1.6959171295166016, "learning_rate": 2.4966982268486557e-06, "loss": 0.7688, "step": 19164 }, { "epoch": 0.6769423680620251, "grad_norm": 1.882090449333191, "learning_rate": 2.4962030864195387e-06, "loss": 0.7901, "step": 19165 }, { "epoch": 0.676977689865733, "grad_norm": 1.5233581066131592, "learning_rate": 2.4957079787596454e-06, "loss": 0.7518, "step": 19166 }, { "epoch": 0.677013011669441, "grad_norm": 1.6893119812011719, "learning_rate": 2.4952129038754574e-06, "loss": 0.7874, "step": 19167 }, { "epoch": 0.6770483334731489, "grad_norm": 2.0836703777313232, "learning_rate": 2.4947178617734503e-06, "loss": 0.7902, "step": 19168 }, { "epoch": 0.6770836552768568, "grad_norm": 1.669683575630188, "learning_rate": 2.494222852460104e-06, "loss": 0.786, "step": 19169 }, { "epoch": 0.6771189770805646, "grad_norm": 1.6279433965682983, "learning_rate": 2.493727875941901e-06, "loss": 0.745, "step": 19170 }, { "epoch": 0.6771542988842725, "grad_norm": 1.6183992624282837, "learning_rate": 2.493232932225314e-06, "loss": 0.7429, "step": 19171 }, { "epoch": 0.6771896206879804, "grad_norm": 1.5938340425491333, "learning_rate": 2.492738021316823e-06, "loss": 0.7782, "step": 19172 }, { "epoch": 0.6772249424916883, "grad_norm": 1.6100220680236816, "learning_rate": 2.4922431432229077e-06, "loss": 0.7613, "step": 19173 }, { "epoch": 0.6772602642953962, "grad_norm": 1.6709682941436768, "learning_rate": 2.4917482979500406e-06, "loss": 0.7748, "step": 19174 }, { "epoch": 0.6772955860991041, "grad_norm": 1.5988610982894897, "learning_rate": 2.4912534855047007e-06, "loss": 0.783, "step": 19175 }, { "epoch": 0.677330907902812, "grad_norm": 1.736436367034912, "learning_rate": 2.4907587058933648e-06, "loss": 0.7839, "step": 19176 }, { "epoch": 0.6773662297065199, "grad_norm": 1.6373592615127563, "learning_rate": 2.490263959122506e-06, "loss": 0.7784, "step": 19177 }, { "epoch": 0.6774015515102279, "grad_norm": 1.4978340864181519, "learning_rate": 2.4897692451986e-06, "loss": 0.7403, "step": 19178 }, { "epoch": 0.6774368733139358, "grad_norm": 1.5355769395828247, "learning_rate": 2.489274564128125e-06, "loss": 0.7549, "step": 19179 }, { "epoch": 0.6774721951176437, "grad_norm": 2.014143228530884, "learning_rate": 2.4887799159175517e-06, "loss": 0.746, "step": 19180 }, { "epoch": 0.6775075169213516, "grad_norm": 1.617287278175354, "learning_rate": 2.4882853005733536e-06, "loss": 0.7724, "step": 19181 }, { "epoch": 0.6775428387250595, "grad_norm": 1.7609949111938477, "learning_rate": 2.487790718102005e-06, "loss": 0.7571, "step": 19182 }, { "epoch": 0.6775781605287674, "grad_norm": 1.618764042854309, "learning_rate": 2.487296168509981e-06, "loss": 0.7652, "step": 19183 }, { "epoch": 0.6776134823324753, "grad_norm": 1.9074684381484985, "learning_rate": 2.4868016518037506e-06, "loss": 0.771, "step": 19184 }, { "epoch": 0.6776488041361832, "grad_norm": 1.6648749113082886, "learning_rate": 2.486307167989788e-06, "loss": 0.7752, "step": 19185 }, { "epoch": 0.6776841259398911, "grad_norm": 1.653382420539856, "learning_rate": 2.4858127170745667e-06, "loss": 0.8082, "step": 19186 }, { "epoch": 0.677719447743599, "grad_norm": 1.8766947984695435, "learning_rate": 2.4853182990645534e-06, "loss": 0.7354, "step": 19187 }, { "epoch": 0.677754769547307, "grad_norm": 1.6515448093414307, "learning_rate": 2.484823913966222e-06, "loss": 0.757, "step": 19188 }, { "epoch": 0.6777900913510149, "grad_norm": 1.6421806812286377, "learning_rate": 2.4843295617860424e-06, "loss": 0.8096, "step": 19189 }, { "epoch": 0.6778254131547228, "grad_norm": 1.7137495279312134, "learning_rate": 2.483835242530486e-06, "loss": 0.7858, "step": 19190 }, { "epoch": 0.6778607349584307, "grad_norm": 1.6585712432861328, "learning_rate": 2.48334095620602e-06, "loss": 0.7627, "step": 19191 }, { "epoch": 0.6778960567621386, "grad_norm": 2.1709578037261963, "learning_rate": 2.482846702819114e-06, "loss": 0.814, "step": 19192 }, { "epoch": 0.6779313785658465, "grad_norm": 1.5722949504852295, "learning_rate": 2.482352482376239e-06, "loss": 0.7701, "step": 19193 }, { "epoch": 0.6779667003695544, "grad_norm": 1.6397345066070557, "learning_rate": 2.4818582948838594e-06, "loss": 0.7897, "step": 19194 }, { "epoch": 0.6780020221732623, "grad_norm": 1.6518018245697021, "learning_rate": 2.4813641403484452e-06, "loss": 0.7868, "step": 19195 }, { "epoch": 0.6780373439769701, "grad_norm": 1.674363374710083, "learning_rate": 2.4808700187764655e-06, "loss": 0.741, "step": 19196 }, { "epoch": 0.678072665780678, "grad_norm": 1.5014828443527222, "learning_rate": 2.480375930174384e-06, "loss": 0.7647, "step": 19197 }, { "epoch": 0.678107987584386, "grad_norm": 1.4830175638198853, "learning_rate": 2.47988187454867e-06, "loss": 0.743, "step": 19198 }, { "epoch": 0.6781433093880939, "grad_norm": 1.645466685295105, "learning_rate": 2.479387851905787e-06, "loss": 0.754, "step": 19199 }, { "epoch": 0.6781786311918018, "grad_norm": 1.9989629983901978, "learning_rate": 2.478893862252203e-06, "loss": 0.7875, "step": 19200 }, { "epoch": 0.6782139529955097, "grad_norm": 1.717219352722168, "learning_rate": 2.4783999055943806e-06, "loss": 0.7899, "step": 19201 }, { "epoch": 0.6782492747992176, "grad_norm": 1.8067618608474731, "learning_rate": 2.477905981938787e-06, "loss": 0.7629, "step": 19202 }, { "epoch": 0.6782845966029255, "grad_norm": 1.5825130939483643, "learning_rate": 2.477412091291887e-06, "loss": 0.7782, "step": 19203 }, { "epoch": 0.6783199184066334, "grad_norm": 2.4051356315612793, "learning_rate": 2.476918233660142e-06, "loss": 0.7932, "step": 19204 }, { "epoch": 0.6783552402103413, "grad_norm": 1.730514407157898, "learning_rate": 2.476424409050016e-06, "loss": 0.741, "step": 19205 }, { "epoch": 0.6783905620140492, "grad_norm": 1.8706908226013184, "learning_rate": 2.4759306174679754e-06, "loss": 0.8227, "step": 19206 }, { "epoch": 0.6784258838177571, "grad_norm": 1.6476819515228271, "learning_rate": 2.4754368589204785e-06, "loss": 0.7796, "step": 19207 }, { "epoch": 0.6784612056214651, "grad_norm": 1.5338250398635864, "learning_rate": 2.4749431334139894e-06, "loss": 0.7474, "step": 19208 }, { "epoch": 0.678496527425173, "grad_norm": 1.6806482076644897, "learning_rate": 2.474449440954972e-06, "loss": 0.7902, "step": 19209 }, { "epoch": 0.6785318492288809, "grad_norm": 1.708154320716858, "learning_rate": 2.473955781549883e-06, "loss": 0.7708, "step": 19210 }, { "epoch": 0.6785671710325888, "grad_norm": 1.650195598602295, "learning_rate": 2.4734621552051867e-06, "loss": 0.79, "step": 19211 }, { "epoch": 0.6786024928362967, "grad_norm": 1.6350746154785156, "learning_rate": 2.472968561927344e-06, "loss": 0.7451, "step": 19212 }, { "epoch": 0.6786378146400046, "grad_norm": 1.6248940229415894, "learning_rate": 2.472475001722812e-06, "loss": 0.7488, "step": 19213 }, { "epoch": 0.6786731364437125, "grad_norm": 1.7171244621276855, "learning_rate": 2.4719814745980524e-06, "loss": 0.7718, "step": 19214 }, { "epoch": 0.6787084582474204, "grad_norm": 1.80289888381958, "learning_rate": 2.471487980559526e-06, "loss": 0.7644, "step": 19215 }, { "epoch": 0.6787437800511283, "grad_norm": 0.9858466386795044, "learning_rate": 2.470994519613689e-06, "loss": 0.5954, "step": 19216 }, { "epoch": 0.6787791018548363, "grad_norm": 1.6245335340499878, "learning_rate": 2.470501091766999e-06, "loss": 0.8002, "step": 19217 }, { "epoch": 0.6788144236585442, "grad_norm": 1.4741599559783936, "learning_rate": 2.4700076970259153e-06, "loss": 0.7603, "step": 19218 }, { "epoch": 0.6788497454622521, "grad_norm": 1.6587928533554077, "learning_rate": 2.4695143353968966e-06, "loss": 0.8134, "step": 19219 }, { "epoch": 0.67888506726596, "grad_norm": 1.6236172914505005, "learning_rate": 2.469021006886397e-06, "loss": 0.7695, "step": 19220 }, { "epoch": 0.6789203890696679, "grad_norm": 1.5267761945724487, "learning_rate": 2.4685277115008743e-06, "loss": 0.7688, "step": 19221 }, { "epoch": 0.6789557108733757, "grad_norm": 1.6572260856628418, "learning_rate": 2.468034449246788e-06, "loss": 0.7882, "step": 19222 }, { "epoch": 0.6789910326770836, "grad_norm": 1.9320151805877686, "learning_rate": 2.4675412201305876e-06, "loss": 0.8111, "step": 19223 }, { "epoch": 0.6790263544807915, "grad_norm": 1.6061732769012451, "learning_rate": 2.4670480241587323e-06, "loss": 0.7932, "step": 19224 }, { "epoch": 0.6790616762844994, "grad_norm": 1.5922130346298218, "learning_rate": 2.4665548613376784e-06, "loss": 0.7662, "step": 19225 }, { "epoch": 0.6790969980882073, "grad_norm": 1.8356256484985352, "learning_rate": 2.466061731673876e-06, "loss": 0.7976, "step": 19226 }, { "epoch": 0.6791323198919152, "grad_norm": 1.6409046649932861, "learning_rate": 2.465568635173782e-06, "loss": 0.7689, "step": 19227 }, { "epoch": 0.6791676416956232, "grad_norm": 1.5118374824523926, "learning_rate": 2.4650755718438506e-06, "loss": 0.7556, "step": 19228 }, { "epoch": 0.6792029634993311, "grad_norm": 1.5565072298049927, "learning_rate": 2.4645825416905315e-06, "loss": 0.7981, "step": 19229 }, { "epoch": 0.679238285303039, "grad_norm": 1.8200355768203735, "learning_rate": 2.46408954472028e-06, "loss": 0.7847, "step": 19230 }, { "epoch": 0.6792736071067469, "grad_norm": 1.7816245555877686, "learning_rate": 2.4635965809395496e-06, "loss": 0.7441, "step": 19231 }, { "epoch": 0.6793089289104548, "grad_norm": 1.4624782800674438, "learning_rate": 2.4631036503547883e-06, "loss": 0.7952, "step": 19232 }, { "epoch": 0.6793442507141627, "grad_norm": 1.6354259252548218, "learning_rate": 2.4626107529724497e-06, "loss": 0.7713, "step": 19233 }, { "epoch": 0.6793795725178706, "grad_norm": 2.104126453399658, "learning_rate": 2.462117888798986e-06, "loss": 0.7898, "step": 19234 }, { "epoch": 0.6794148943215785, "grad_norm": 1.6101278066635132, "learning_rate": 2.4616250578408467e-06, "loss": 0.7713, "step": 19235 }, { "epoch": 0.6794502161252864, "grad_norm": 1.7271795272827148, "learning_rate": 2.46113226010448e-06, "loss": 0.7823, "step": 19236 }, { "epoch": 0.6794855379289944, "grad_norm": 1.8280832767486572, "learning_rate": 2.4606394955963366e-06, "loss": 0.7947, "step": 19237 }, { "epoch": 0.6795208597327023, "grad_norm": 2.2068684101104736, "learning_rate": 2.4601467643228675e-06, "loss": 0.7784, "step": 19238 }, { "epoch": 0.6795561815364102, "grad_norm": 1.668726921081543, "learning_rate": 2.4596540662905196e-06, "loss": 0.774, "step": 19239 }, { "epoch": 0.6795915033401181, "grad_norm": 1.838079810142517, "learning_rate": 2.4591614015057407e-06, "loss": 0.7854, "step": 19240 }, { "epoch": 0.679626825143826, "grad_norm": 1.6378746032714844, "learning_rate": 2.458668769974982e-06, "loss": 0.7633, "step": 19241 }, { "epoch": 0.6796621469475339, "grad_norm": 1.6107070446014404, "learning_rate": 2.4581761717046866e-06, "loss": 0.7936, "step": 19242 }, { "epoch": 0.6796974687512418, "grad_norm": 1.6715956926345825, "learning_rate": 2.4576836067013033e-06, "loss": 0.7847, "step": 19243 }, { "epoch": 0.6797327905549497, "grad_norm": 1.7034244537353516, "learning_rate": 2.4571910749712816e-06, "loss": 0.7715, "step": 19244 }, { "epoch": 0.6797681123586576, "grad_norm": 2.0457816123962402, "learning_rate": 2.4566985765210634e-06, "loss": 0.8185, "step": 19245 }, { "epoch": 0.6798034341623655, "grad_norm": 1.8006757497787476, "learning_rate": 2.4562061113570952e-06, "loss": 0.7604, "step": 19246 }, { "epoch": 0.6798387559660735, "grad_norm": 1.8144675493240356, "learning_rate": 2.455713679485824e-06, "loss": 0.7716, "step": 19247 }, { "epoch": 0.6798740777697813, "grad_norm": 1.8685088157653809, "learning_rate": 2.4552212809136956e-06, "loss": 0.7757, "step": 19248 }, { "epoch": 0.6799093995734892, "grad_norm": 0.9333674311637878, "learning_rate": 2.4547289156471505e-06, "loss": 0.569, "step": 19249 }, { "epoch": 0.6799447213771971, "grad_norm": 1.9017040729522705, "learning_rate": 2.4542365836926353e-06, "loss": 0.7608, "step": 19250 }, { "epoch": 0.679980043180905, "grad_norm": 1.6936333179473877, "learning_rate": 2.4537442850565953e-06, "loss": 0.8199, "step": 19251 }, { "epoch": 0.6800153649846129, "grad_norm": 1.8161722421646118, "learning_rate": 2.453252019745469e-06, "loss": 0.7604, "step": 19252 }, { "epoch": 0.6800506867883208, "grad_norm": 1.4049128293991089, "learning_rate": 2.4527597877657034e-06, "loss": 0.716, "step": 19253 }, { "epoch": 0.6800860085920287, "grad_norm": 2.0824198722839355, "learning_rate": 2.452267589123739e-06, "loss": 0.7518, "step": 19254 }, { "epoch": 0.6801213303957366, "grad_norm": 1.5165396928787231, "learning_rate": 2.4517754238260154e-06, "loss": 0.7428, "step": 19255 }, { "epoch": 0.6801566521994445, "grad_norm": 1.5745829343795776, "learning_rate": 2.4512832918789765e-06, "loss": 0.7373, "step": 19256 }, { "epoch": 0.6801919740031525, "grad_norm": 1.6584036350250244, "learning_rate": 2.4507911932890627e-06, "loss": 0.7578, "step": 19257 }, { "epoch": 0.6802272958068604, "grad_norm": 1.7223812341690063, "learning_rate": 2.4502991280627163e-06, "loss": 0.7575, "step": 19258 }, { "epoch": 0.6802626176105683, "grad_norm": 1.6735397577285767, "learning_rate": 2.4498070962063734e-06, "loss": 0.7338, "step": 19259 }, { "epoch": 0.6802979394142762, "grad_norm": 3.9224865436553955, "learning_rate": 2.449315097726476e-06, "loss": 0.7561, "step": 19260 }, { "epoch": 0.6803332612179841, "grad_norm": 1.7674380540847778, "learning_rate": 2.4488231326294656e-06, "loss": 0.7651, "step": 19261 }, { "epoch": 0.680368583021692, "grad_norm": 1.6652857065200806, "learning_rate": 2.4483312009217764e-06, "loss": 0.7665, "step": 19262 }, { "epoch": 0.6804039048253999, "grad_norm": 1.7165076732635498, "learning_rate": 2.4478393026098486e-06, "loss": 0.7368, "step": 19263 }, { "epoch": 0.6804392266291078, "grad_norm": 1.731147050857544, "learning_rate": 2.4473474377001217e-06, "loss": 0.7875, "step": 19264 }, { "epoch": 0.6804745484328157, "grad_norm": 1.826982021331787, "learning_rate": 2.4468556061990302e-06, "loss": 0.8054, "step": 19265 }, { "epoch": 0.6805098702365237, "grad_norm": 1.6269617080688477, "learning_rate": 2.4463638081130124e-06, "loss": 0.7889, "step": 19266 }, { "epoch": 0.6805451920402316, "grad_norm": 1.5893536806106567, "learning_rate": 2.4458720434485074e-06, "loss": 0.7759, "step": 19267 }, { "epoch": 0.6805805138439395, "grad_norm": 1.836632251739502, "learning_rate": 2.4453803122119468e-06, "loss": 0.8072, "step": 19268 }, { "epoch": 0.6806158356476474, "grad_norm": 1.6801700592041016, "learning_rate": 2.4448886144097684e-06, "loss": 0.7718, "step": 19269 }, { "epoch": 0.6806511574513553, "grad_norm": 1.5597785711288452, "learning_rate": 2.444396950048409e-06, "loss": 0.7725, "step": 19270 }, { "epoch": 0.6806864792550632, "grad_norm": 1.6996288299560547, "learning_rate": 2.443905319134301e-06, "loss": 0.7997, "step": 19271 }, { "epoch": 0.6807218010587711, "grad_norm": 1.6310186386108398, "learning_rate": 2.443413721673881e-06, "loss": 0.7633, "step": 19272 }, { "epoch": 0.680757122862479, "grad_norm": 1.7597754001617432, "learning_rate": 2.4429221576735794e-06, "loss": 0.7664, "step": 19273 }, { "epoch": 0.6807924446661868, "grad_norm": 1.8414195775985718, "learning_rate": 2.4424306271398345e-06, "loss": 0.7954, "step": 19274 }, { "epoch": 0.6808277664698947, "grad_norm": 1.9676871299743652, "learning_rate": 2.4419391300790745e-06, "loss": 0.7903, "step": 19275 }, { "epoch": 0.6808630882736026, "grad_norm": 1.82174551486969, "learning_rate": 2.4414476664977343e-06, "loss": 0.7708, "step": 19276 }, { "epoch": 0.6808984100773106, "grad_norm": 2.7008957862854004, "learning_rate": 2.4409562364022484e-06, "loss": 0.7784, "step": 19277 }, { "epoch": 0.6809337318810185, "grad_norm": 1.7290583848953247, "learning_rate": 2.4404648397990437e-06, "loss": 0.7942, "step": 19278 }, { "epoch": 0.6809690536847264, "grad_norm": 1.730672836303711, "learning_rate": 2.439973476694555e-06, "loss": 0.7613, "step": 19279 }, { "epoch": 0.6810043754884343, "grad_norm": 1.9744004011154175, "learning_rate": 2.439482147095213e-06, "loss": 0.7678, "step": 19280 }, { "epoch": 0.6810396972921422, "grad_norm": 1.6142932176589966, "learning_rate": 2.438990851007446e-06, "loss": 0.7791, "step": 19281 }, { "epoch": 0.6810750190958501, "grad_norm": 1.7170414924621582, "learning_rate": 2.438499588437685e-06, "loss": 0.7937, "step": 19282 }, { "epoch": 0.681110340899558, "grad_norm": 0.8702330589294434, "learning_rate": 2.438008359392362e-06, "loss": 0.5946, "step": 19283 }, { "epoch": 0.6811456627032659, "grad_norm": 1.6677236557006836, "learning_rate": 2.4375171638779023e-06, "loss": 0.7846, "step": 19284 }, { "epoch": 0.6811809845069738, "grad_norm": 1.9103457927703857, "learning_rate": 2.437026001900736e-06, "loss": 0.7537, "step": 19285 }, { "epoch": 0.6812163063106818, "grad_norm": 1.7347780466079712, "learning_rate": 2.4365348734672933e-06, "loss": 0.7879, "step": 19286 }, { "epoch": 0.6812516281143897, "grad_norm": 1.6004903316497803, "learning_rate": 2.436043778583999e-06, "loss": 0.7782, "step": 19287 }, { "epoch": 0.6812869499180976, "grad_norm": 1.783028244972229, "learning_rate": 2.435552717257281e-06, "loss": 0.8143, "step": 19288 }, { "epoch": 0.6813222717218055, "grad_norm": 1.721198320388794, "learning_rate": 2.4350616894935695e-06, "loss": 0.7926, "step": 19289 }, { "epoch": 0.6813575935255134, "grad_norm": 2.012927770614624, "learning_rate": 2.434570695299288e-06, "loss": 0.8042, "step": 19290 }, { "epoch": 0.6813929153292213, "grad_norm": 1.7296624183654785, "learning_rate": 2.4340797346808613e-06, "loss": 0.8315, "step": 19291 }, { "epoch": 0.6814282371329292, "grad_norm": 1.640981674194336, "learning_rate": 2.4335888076447166e-06, "loss": 0.7709, "step": 19292 }, { "epoch": 0.6814635589366371, "grad_norm": 1.6242955923080444, "learning_rate": 2.4330979141972815e-06, "loss": 0.7554, "step": 19293 }, { "epoch": 0.681498880740345, "grad_norm": 1.6290584802627563, "learning_rate": 2.4326070543449764e-06, "loss": 0.7403, "step": 19294 }, { "epoch": 0.681534202544053, "grad_norm": 1.5164012908935547, "learning_rate": 2.4321162280942273e-06, "loss": 0.768, "step": 19295 }, { "epoch": 0.6815695243477609, "grad_norm": 1.5902597904205322, "learning_rate": 2.4316254354514603e-06, "loss": 0.7454, "step": 19296 }, { "epoch": 0.6816048461514688, "grad_norm": 1.6927975416183472, "learning_rate": 2.431134676423094e-06, "loss": 0.7887, "step": 19297 }, { "epoch": 0.6816401679551767, "grad_norm": 1.7402828931808472, "learning_rate": 2.4306439510155557e-06, "loss": 0.8003, "step": 19298 }, { "epoch": 0.6816754897588846, "grad_norm": 1.573570966720581, "learning_rate": 2.4301532592352672e-06, "loss": 0.7849, "step": 19299 }, { "epoch": 0.6817108115625924, "grad_norm": 1.645551323890686, "learning_rate": 2.4296626010886483e-06, "loss": 0.7629, "step": 19300 }, { "epoch": 0.6817461333663003, "grad_norm": 1.6301798820495605, "learning_rate": 2.429171976582122e-06, "loss": 0.7425, "step": 19301 }, { "epoch": 0.6817814551700082, "grad_norm": 1.74901282787323, "learning_rate": 2.4286813857221115e-06, "loss": 0.769, "step": 19302 }, { "epoch": 0.6818167769737161, "grad_norm": 1.8036956787109375, "learning_rate": 2.4281908285150334e-06, "loss": 0.7697, "step": 19303 }, { "epoch": 0.681852098777424, "grad_norm": 0.9241068363189697, "learning_rate": 2.427700304967311e-06, "loss": 0.6025, "step": 19304 }, { "epoch": 0.6818874205811319, "grad_norm": 1.6770187616348267, "learning_rate": 2.427209815085363e-06, "loss": 0.7912, "step": 19305 }, { "epoch": 0.6819227423848399, "grad_norm": 1.7169475555419922, "learning_rate": 2.4267193588756115e-06, "loss": 0.8185, "step": 19306 }, { "epoch": 0.6819580641885478, "grad_norm": 1.7844887971878052, "learning_rate": 2.4262289363444715e-06, "loss": 0.7916, "step": 19307 }, { "epoch": 0.6819933859922557, "grad_norm": 1.6668989658355713, "learning_rate": 2.425738547498363e-06, "loss": 0.7828, "step": 19308 }, { "epoch": 0.6820287077959636, "grad_norm": 1.7995805740356445, "learning_rate": 2.425248192343708e-06, "loss": 0.7709, "step": 19309 }, { "epoch": 0.6820640295996715, "grad_norm": 1.5379643440246582, "learning_rate": 2.4247578708869175e-06, "loss": 0.7662, "step": 19310 }, { "epoch": 0.6820993514033794, "grad_norm": 1.5667804479599, "learning_rate": 2.4242675831344115e-06, "loss": 0.7564, "step": 19311 }, { "epoch": 0.6821346732070873, "grad_norm": 1.7594152688980103, "learning_rate": 2.4237773290926093e-06, "loss": 0.7866, "step": 19312 }, { "epoch": 0.6821699950107952, "grad_norm": 1.6552441120147705, "learning_rate": 2.4232871087679234e-06, "loss": 0.752, "step": 19313 }, { "epoch": 0.6822053168145031, "grad_norm": 1.571641445159912, "learning_rate": 2.4227969221667714e-06, "loss": 0.7542, "step": 19314 }, { "epoch": 0.682240638618211, "grad_norm": 1.5896306037902832, "learning_rate": 2.4223067692955683e-06, "loss": 0.7333, "step": 19315 }, { "epoch": 0.682275960421919, "grad_norm": 1.7153172492980957, "learning_rate": 2.421816650160732e-06, "loss": 0.7721, "step": 19316 }, { "epoch": 0.6823112822256269, "grad_norm": 1.9046635627746582, "learning_rate": 2.421326564768673e-06, "loss": 0.752, "step": 19317 }, { "epoch": 0.6823466040293348, "grad_norm": 1.6242358684539795, "learning_rate": 2.4208365131258064e-06, "loss": 0.7476, "step": 19318 }, { "epoch": 0.6823819258330427, "grad_norm": 1.583600401878357, "learning_rate": 2.420346495238549e-06, "loss": 0.6983, "step": 19319 }, { "epoch": 0.6824172476367506, "grad_norm": 2.0413711071014404, "learning_rate": 2.4198565111133104e-06, "loss": 0.7581, "step": 19320 }, { "epoch": 0.6824525694404585, "grad_norm": 1.6521347761154175, "learning_rate": 2.419366560756504e-06, "loss": 0.7818, "step": 19321 }, { "epoch": 0.6824878912441664, "grad_norm": 2.137022018432617, "learning_rate": 2.4188766441745453e-06, "loss": 0.7656, "step": 19322 }, { "epoch": 0.6825232130478743, "grad_norm": 1.6020299196243286, "learning_rate": 2.418386761373842e-06, "loss": 0.7655, "step": 19323 }, { "epoch": 0.6825585348515822, "grad_norm": 1.8223698139190674, "learning_rate": 2.4178969123608072e-06, "loss": 0.7909, "step": 19324 }, { "epoch": 0.6825938566552902, "grad_norm": 1.6960505247116089, "learning_rate": 2.4174070971418545e-06, "loss": 0.7573, "step": 19325 }, { "epoch": 0.682629178458998, "grad_norm": 1.795056939125061, "learning_rate": 2.4169173157233898e-06, "loss": 0.7828, "step": 19326 }, { "epoch": 0.6826645002627059, "grad_norm": 1.6840078830718994, "learning_rate": 2.416427568111828e-06, "loss": 0.7584, "step": 19327 }, { "epoch": 0.6826998220664138, "grad_norm": 1.7964653968811035, "learning_rate": 2.415937854313575e-06, "loss": 0.7213, "step": 19328 }, { "epoch": 0.6827351438701217, "grad_norm": 1.7687839269638062, "learning_rate": 2.415448174335043e-06, "loss": 0.788, "step": 19329 }, { "epoch": 0.6827704656738296, "grad_norm": 1.7505779266357422, "learning_rate": 2.414958528182637e-06, "loss": 0.813, "step": 19330 }, { "epoch": 0.6828057874775375, "grad_norm": 1.6585543155670166, "learning_rate": 2.4144689158627686e-06, "loss": 0.8375, "step": 19331 }, { "epoch": 0.6828411092812454, "grad_norm": 1.5992240905761719, "learning_rate": 2.4139793373818467e-06, "loss": 0.7764, "step": 19332 }, { "epoch": 0.6828764310849533, "grad_norm": 1.7347524166107178, "learning_rate": 2.413489792746275e-06, "loss": 0.7376, "step": 19333 }, { "epoch": 0.6829117528886612, "grad_norm": 1.6699267625808716, "learning_rate": 2.4130002819624633e-06, "loss": 0.7795, "step": 19334 }, { "epoch": 0.6829470746923691, "grad_norm": 0.998394250869751, "learning_rate": 2.412510805036819e-06, "loss": 0.589, "step": 19335 }, { "epoch": 0.6829823964960771, "grad_norm": 1.647106647491455, "learning_rate": 2.4120213619757454e-06, "loss": 0.7726, "step": 19336 }, { "epoch": 0.683017718299785, "grad_norm": 1.7709598541259766, "learning_rate": 2.4115319527856497e-06, "loss": 0.8077, "step": 19337 }, { "epoch": 0.6830530401034929, "grad_norm": 1.6858412027359009, "learning_rate": 2.4110425774729384e-06, "loss": 0.826, "step": 19338 }, { "epoch": 0.6830883619072008, "grad_norm": 1.7116228342056274, "learning_rate": 2.4105532360440136e-06, "loss": 0.7927, "step": 19339 }, { "epoch": 0.6831236837109087, "grad_norm": 1.669873595237732, "learning_rate": 2.410063928505282e-06, "loss": 0.7673, "step": 19340 }, { "epoch": 0.6831590055146166, "grad_norm": 0.9349522590637207, "learning_rate": 2.409574654863148e-06, "loss": 0.5857, "step": 19341 }, { "epoch": 0.6831943273183245, "grad_norm": 1.7385178804397583, "learning_rate": 2.4090854151240125e-06, "loss": 0.7562, "step": 19342 }, { "epoch": 0.6832296491220324, "grad_norm": 1.641993522644043, "learning_rate": 2.4085962092942794e-06, "loss": 0.7798, "step": 19343 }, { "epoch": 0.6832649709257403, "grad_norm": 1.7477909326553345, "learning_rate": 2.4081070373803545e-06, "loss": 0.7942, "step": 19344 }, { "epoch": 0.6833002927294483, "grad_norm": 1.7058089971542358, "learning_rate": 2.407617899388635e-06, "loss": 0.7766, "step": 19345 }, { "epoch": 0.6833356145331562, "grad_norm": 1.8356044292449951, "learning_rate": 2.407128795325528e-06, "loss": 0.777, "step": 19346 }, { "epoch": 0.6833709363368641, "grad_norm": 1.6681081056594849, "learning_rate": 2.4066397251974294e-06, "loss": 0.7692, "step": 19347 }, { "epoch": 0.683406258140572, "grad_norm": 1.6144853830337524, "learning_rate": 2.4061506890107443e-06, "loss": 0.7514, "step": 19348 }, { "epoch": 0.6834415799442799, "grad_norm": 1.6676174402236938, "learning_rate": 2.4056616867718696e-06, "loss": 0.8, "step": 19349 }, { "epoch": 0.6834769017479878, "grad_norm": 1.6889389753341675, "learning_rate": 2.405172718487207e-06, "loss": 0.7199, "step": 19350 }, { "epoch": 0.6835122235516957, "grad_norm": 1.9394103288650513, "learning_rate": 2.404683784163158e-06, "loss": 0.7686, "step": 19351 }, { "epoch": 0.6835475453554035, "grad_norm": 1.773969292640686, "learning_rate": 2.404194883806118e-06, "loss": 0.7885, "step": 19352 }, { "epoch": 0.6835828671591114, "grad_norm": 1.782734751701355, "learning_rate": 2.4037060174224874e-06, "loss": 0.7758, "step": 19353 }, { "epoch": 0.6836181889628193, "grad_norm": 1.8804261684417725, "learning_rate": 2.4032171850186666e-06, "loss": 0.7991, "step": 19354 }, { "epoch": 0.6836535107665273, "grad_norm": 1.8260232210159302, "learning_rate": 2.402728386601049e-06, "loss": 0.7878, "step": 19355 }, { "epoch": 0.6836888325702352, "grad_norm": 1.745274543762207, "learning_rate": 2.402239622176034e-06, "loss": 0.7733, "step": 19356 }, { "epoch": 0.6837241543739431, "grad_norm": 1.639773964881897, "learning_rate": 2.4017508917500204e-06, "loss": 0.7482, "step": 19357 }, { "epoch": 0.683759476177651, "grad_norm": 1.839113473892212, "learning_rate": 2.401262195329401e-06, "loss": 0.7941, "step": 19358 }, { "epoch": 0.6837947979813589, "grad_norm": 1.8744791746139526, "learning_rate": 2.400773532920573e-06, "loss": 0.7763, "step": 19359 }, { "epoch": 0.6838301197850668, "grad_norm": 1.868106722831726, "learning_rate": 2.4002849045299343e-06, "loss": 0.7884, "step": 19360 }, { "epoch": 0.6838654415887747, "grad_norm": 1.695124864578247, "learning_rate": 2.3997963101638766e-06, "loss": 0.7612, "step": 19361 }, { "epoch": 0.6839007633924826, "grad_norm": 1.6837209463119507, "learning_rate": 2.399307749828796e-06, "loss": 0.7902, "step": 19362 }, { "epoch": 0.6839360851961905, "grad_norm": 1.7001994848251343, "learning_rate": 2.3988192235310865e-06, "loss": 0.7698, "step": 19363 }, { "epoch": 0.6839714069998984, "grad_norm": 1.8052023649215698, "learning_rate": 2.398330731277146e-06, "loss": 0.7977, "step": 19364 }, { "epoch": 0.6840067288036064, "grad_norm": 1.555586814880371, "learning_rate": 2.39784227307336e-06, "loss": 0.7298, "step": 19365 }, { "epoch": 0.6840420506073143, "grad_norm": 1.6922674179077148, "learning_rate": 2.397353848926125e-06, "loss": 0.7633, "step": 19366 }, { "epoch": 0.6840773724110222, "grad_norm": 1.8425145149230957, "learning_rate": 2.3968654588418357e-06, "loss": 0.7487, "step": 19367 }, { "epoch": 0.6841126942147301, "grad_norm": 1.8179900646209717, "learning_rate": 2.3963771028268797e-06, "loss": 0.7649, "step": 19368 }, { "epoch": 0.684148016018438, "grad_norm": 1.7504030466079712, "learning_rate": 2.395888780887651e-06, "loss": 0.7369, "step": 19369 }, { "epoch": 0.6841833378221459, "grad_norm": 1.7182233333587646, "learning_rate": 2.395400493030542e-06, "loss": 0.796, "step": 19370 }, { "epoch": 0.6842186596258538, "grad_norm": 1.7867612838745117, "learning_rate": 2.39491223926194e-06, "loss": 0.7391, "step": 19371 }, { "epoch": 0.6842539814295617, "grad_norm": 1.609370231628418, "learning_rate": 2.394424019588237e-06, "loss": 0.766, "step": 19372 }, { "epoch": 0.6842893032332696, "grad_norm": 1.871398687362671, "learning_rate": 2.3939358340158232e-06, "loss": 0.7921, "step": 19373 }, { "epoch": 0.6843246250369776, "grad_norm": 1.5520281791687012, "learning_rate": 2.3934476825510883e-06, "loss": 0.7743, "step": 19374 }, { "epoch": 0.6843599468406855, "grad_norm": 1.6651691198349, "learning_rate": 2.3929595652004185e-06, "loss": 0.7779, "step": 19375 }, { "epoch": 0.6843952686443934, "grad_norm": 1.5330069065093994, "learning_rate": 2.392471481970204e-06, "loss": 0.7373, "step": 19376 }, { "epoch": 0.6844305904481013, "grad_norm": 1.5899149179458618, "learning_rate": 2.3919834328668346e-06, "loss": 0.7627, "step": 19377 }, { "epoch": 0.6844659122518091, "grad_norm": 1.9138737916946411, "learning_rate": 2.3914954178966937e-06, "loss": 0.7762, "step": 19378 }, { "epoch": 0.684501234055517, "grad_norm": 1.6805243492126465, "learning_rate": 2.39100743706617e-06, "loss": 0.7967, "step": 19379 }, { "epoch": 0.6845365558592249, "grad_norm": 1.9636485576629639, "learning_rate": 2.390519490381653e-06, "loss": 0.7614, "step": 19380 }, { "epoch": 0.6845718776629328, "grad_norm": 1.693897008895874, "learning_rate": 2.3900315778495246e-06, "loss": 0.7892, "step": 19381 }, { "epoch": 0.6846071994666407, "grad_norm": 1.7291841506958008, "learning_rate": 2.3895436994761722e-06, "loss": 0.7626, "step": 19382 }, { "epoch": 0.6846425212703486, "grad_norm": 2.2435882091522217, "learning_rate": 2.3890558552679828e-06, "loss": 0.7997, "step": 19383 }, { "epoch": 0.6846778430740565, "grad_norm": 1.6150447130203247, "learning_rate": 2.38856804523134e-06, "loss": 0.7347, "step": 19384 }, { "epoch": 0.6847131648777645, "grad_norm": 1.657986044883728, "learning_rate": 2.388080269372625e-06, "loss": 0.7502, "step": 19385 }, { "epoch": 0.6847484866814724, "grad_norm": 1.688469409942627, "learning_rate": 2.387592527698225e-06, "loss": 0.7863, "step": 19386 }, { "epoch": 0.6847838084851803, "grad_norm": 1.8316351175308228, "learning_rate": 2.3871048202145247e-06, "loss": 0.7736, "step": 19387 }, { "epoch": 0.6848191302888882, "grad_norm": 1.7920984029769897, "learning_rate": 2.3866171469279037e-06, "loss": 0.7619, "step": 19388 }, { "epoch": 0.6848544520925961, "grad_norm": 1.5759105682373047, "learning_rate": 2.386129507844746e-06, "loss": 0.7635, "step": 19389 }, { "epoch": 0.684889773896304, "grad_norm": 1.5943403244018555, "learning_rate": 2.3856419029714357e-06, "loss": 0.797, "step": 19390 }, { "epoch": 0.6849250957000119, "grad_norm": 1.676147699356079, "learning_rate": 2.3851543323143513e-06, "loss": 0.7851, "step": 19391 }, { "epoch": 0.6849604175037198, "grad_norm": 1.6352618932724, "learning_rate": 2.3846667958798747e-06, "loss": 0.7897, "step": 19392 }, { "epoch": 0.6849957393074277, "grad_norm": 1.6292705535888672, "learning_rate": 2.38417929367439e-06, "loss": 0.7722, "step": 19393 }, { "epoch": 0.6850310611111357, "grad_norm": 1.7643442153930664, "learning_rate": 2.3836918257042728e-06, "loss": 0.8002, "step": 19394 }, { "epoch": 0.6850663829148436, "grad_norm": 1.5148322582244873, "learning_rate": 2.383204391975905e-06, "loss": 0.7845, "step": 19395 }, { "epoch": 0.6851017047185515, "grad_norm": 1.7728642225265503, "learning_rate": 2.3827169924956687e-06, "loss": 0.7628, "step": 19396 }, { "epoch": 0.6851370265222594, "grad_norm": 1.8081499338150024, "learning_rate": 2.382229627269938e-06, "loss": 0.7896, "step": 19397 }, { "epoch": 0.6851723483259673, "grad_norm": 1.7495499849319458, "learning_rate": 2.381742296305094e-06, "loss": 0.807, "step": 19398 }, { "epoch": 0.6852076701296752, "grad_norm": 1.648849368095398, "learning_rate": 2.381254999607517e-06, "loss": 0.7819, "step": 19399 }, { "epoch": 0.6852429919333831, "grad_norm": 1.6182711124420166, "learning_rate": 2.38076773718358e-06, "loss": 0.7646, "step": 19400 }, { "epoch": 0.685278313737091, "grad_norm": 1.6887511014938354, "learning_rate": 2.3802805090396642e-06, "loss": 0.75, "step": 19401 }, { "epoch": 0.6853136355407989, "grad_norm": 1.6201674938201904, "learning_rate": 2.3797933151821426e-06, "loss": 0.7448, "step": 19402 }, { "epoch": 0.6853489573445068, "grad_norm": 1.687314748764038, "learning_rate": 2.3793061556173958e-06, "loss": 0.7681, "step": 19403 }, { "epoch": 0.6853842791482148, "grad_norm": 1.6989401578903198, "learning_rate": 2.3788190303517954e-06, "loss": 0.7606, "step": 19404 }, { "epoch": 0.6854196009519226, "grad_norm": 1.5733461380004883, "learning_rate": 2.3783319393917194e-06, "loss": 0.7642, "step": 19405 }, { "epoch": 0.6854549227556305, "grad_norm": 1.617395281791687, "learning_rate": 2.3778448827435428e-06, "loss": 0.7815, "step": 19406 }, { "epoch": 0.6854902445593384, "grad_norm": 1.5771291255950928, "learning_rate": 2.3773578604136387e-06, "loss": 0.7662, "step": 19407 }, { "epoch": 0.6855255663630463, "grad_norm": 1.809271216392517, "learning_rate": 2.3768708724083815e-06, "loss": 0.7827, "step": 19408 }, { "epoch": 0.6855608881667542, "grad_norm": 0.9419854879379272, "learning_rate": 2.376383918734147e-06, "loss": 0.573, "step": 19409 }, { "epoch": 0.6855962099704621, "grad_norm": 1.7314263582229614, "learning_rate": 2.375896999397305e-06, "loss": 0.7828, "step": 19410 }, { "epoch": 0.68563153177417, "grad_norm": 1.696527123451233, "learning_rate": 2.3754101144042297e-06, "loss": 0.8029, "step": 19411 }, { "epoch": 0.6856668535778779, "grad_norm": 1.5328367948532104, "learning_rate": 2.374923263761295e-06, "loss": 0.7432, "step": 19412 }, { "epoch": 0.6857021753815858, "grad_norm": 1.8290457725524902, "learning_rate": 2.3744364474748694e-06, "loss": 0.8084, "step": 19413 }, { "epoch": 0.6857374971852938, "grad_norm": 2.762577533721924, "learning_rate": 2.373949665551326e-06, "loss": 0.7803, "step": 19414 }, { "epoch": 0.6857728189890017, "grad_norm": 1.647125244140625, "learning_rate": 2.373462917997038e-06, "loss": 0.7712, "step": 19415 }, { "epoch": 0.6858081407927096, "grad_norm": 1.7335412502288818, "learning_rate": 2.3729762048183715e-06, "loss": 0.7779, "step": 19416 }, { "epoch": 0.6858434625964175, "grad_norm": 1.65938138961792, "learning_rate": 2.3724895260216985e-06, "loss": 0.759, "step": 19417 }, { "epoch": 0.6858787844001254, "grad_norm": 1.7939391136169434, "learning_rate": 2.3720028816133904e-06, "loss": 0.7622, "step": 19418 }, { "epoch": 0.6859141062038333, "grad_norm": 1.659002661705017, "learning_rate": 2.3715162715998153e-06, "loss": 0.7755, "step": 19419 }, { "epoch": 0.6859494280075412, "grad_norm": 1.943458080291748, "learning_rate": 2.3710296959873385e-06, "loss": 0.7862, "step": 19420 }, { "epoch": 0.6859847498112491, "grad_norm": 1.7241594791412354, "learning_rate": 2.370543154782331e-06, "loss": 0.767, "step": 19421 }, { "epoch": 0.686020071614957, "grad_norm": 1.6737130880355835, "learning_rate": 2.3700566479911623e-06, "loss": 0.7797, "step": 19422 }, { "epoch": 0.686055393418665, "grad_norm": 1.8166825771331787, "learning_rate": 2.369570175620196e-06, "loss": 0.7404, "step": 19423 }, { "epoch": 0.6860907152223729, "grad_norm": 1.7331568002700806, "learning_rate": 2.3690837376758007e-06, "loss": 0.7845, "step": 19424 }, { "epoch": 0.6861260370260808, "grad_norm": 1.7980477809906006, "learning_rate": 2.3685973341643455e-06, "loss": 0.8006, "step": 19425 }, { "epoch": 0.6861613588297887, "grad_norm": 1.6734087467193604, "learning_rate": 2.3681109650921907e-06, "loss": 0.8202, "step": 19426 }, { "epoch": 0.6861966806334966, "grad_norm": 1.749619483947754, "learning_rate": 2.3676246304657057e-06, "loss": 0.7533, "step": 19427 }, { "epoch": 0.6862320024372045, "grad_norm": 1.7634998559951782, "learning_rate": 2.3671383302912558e-06, "loss": 0.7745, "step": 19428 }, { "epoch": 0.6862673242409124, "grad_norm": 1.7202939987182617, "learning_rate": 2.3666520645752035e-06, "loss": 0.7707, "step": 19429 }, { "epoch": 0.6863026460446203, "grad_norm": 1.7326005697250366, "learning_rate": 2.3661658333239134e-06, "loss": 0.7804, "step": 19430 }, { "epoch": 0.6863379678483281, "grad_norm": 1.8211876153945923, "learning_rate": 2.36567963654375e-06, "loss": 0.8013, "step": 19431 }, { "epoch": 0.686373289652036, "grad_norm": 1.7706025838851929, "learning_rate": 2.3651934742410784e-06, "loss": 0.7653, "step": 19432 }, { "epoch": 0.6864086114557439, "grad_norm": 2.001021385192871, "learning_rate": 2.364707346422257e-06, "loss": 0.8071, "step": 19433 }, { "epoch": 0.6864439332594519, "grad_norm": 1.6885701417922974, "learning_rate": 2.36422125309365e-06, "loss": 0.7507, "step": 19434 }, { "epoch": 0.6864792550631598, "grad_norm": 1.8397115468978882, "learning_rate": 2.3637351942616225e-06, "loss": 0.7869, "step": 19435 }, { "epoch": 0.6865145768668677, "grad_norm": 1.7919670343399048, "learning_rate": 2.3632491699325317e-06, "loss": 0.7441, "step": 19436 }, { "epoch": 0.6865498986705756, "grad_norm": 2.0338964462280273, "learning_rate": 2.362763180112739e-06, "loss": 0.7719, "step": 19437 }, { "epoch": 0.6865852204742835, "grad_norm": 1.7428956031799316, "learning_rate": 2.3622772248086105e-06, "loss": 0.7784, "step": 19438 }, { "epoch": 0.6866205422779914, "grad_norm": 1.7706645727157593, "learning_rate": 2.361791304026498e-06, "loss": 0.7772, "step": 19439 }, { "epoch": 0.6866558640816993, "grad_norm": 1.7425042390823364, "learning_rate": 2.3613054177727653e-06, "loss": 0.7696, "step": 19440 }, { "epoch": 0.6866911858854072, "grad_norm": 1.5592561960220337, "learning_rate": 2.3608195660537704e-06, "loss": 0.818, "step": 19441 }, { "epoch": 0.6867265076891151, "grad_norm": 1.6378469467163086, "learning_rate": 2.3603337488758754e-06, "loss": 0.7869, "step": 19442 }, { "epoch": 0.686761829492823, "grad_norm": 1.7426624298095703, "learning_rate": 2.3598479662454337e-06, "loss": 0.8076, "step": 19443 }, { "epoch": 0.686797151296531, "grad_norm": 3.1936776638031006, "learning_rate": 2.3593622181688062e-06, "loss": 0.7799, "step": 19444 }, { "epoch": 0.6868324731002389, "grad_norm": 1.7478114366531372, "learning_rate": 2.3588765046523508e-06, "loss": 0.7807, "step": 19445 }, { "epoch": 0.6868677949039468, "grad_norm": 1.7066938877105713, "learning_rate": 2.3583908257024218e-06, "loss": 0.7434, "step": 19446 }, { "epoch": 0.6869031167076547, "grad_norm": 1.6308066844940186, "learning_rate": 2.357905181325377e-06, "loss": 0.7577, "step": 19447 }, { "epoch": 0.6869384385113626, "grad_norm": 4.771589279174805, "learning_rate": 2.357419571527575e-06, "loss": 0.8081, "step": 19448 }, { "epoch": 0.6869737603150705, "grad_norm": 1.5567930936813354, "learning_rate": 2.3569339963153664e-06, "loss": 0.7622, "step": 19449 }, { "epoch": 0.6870090821187784, "grad_norm": 1.7368881702423096, "learning_rate": 2.356448455695109e-06, "loss": 0.783, "step": 19450 }, { "epoch": 0.6870444039224863, "grad_norm": 1.9122314453125, "learning_rate": 2.355962949673159e-06, "loss": 0.773, "step": 19451 }, { "epoch": 0.6870797257261942, "grad_norm": 1.623146891593933, "learning_rate": 2.3554774782558666e-06, "loss": 0.7677, "step": 19452 }, { "epoch": 0.6871150475299022, "grad_norm": 1.8211596012115479, "learning_rate": 2.3549920414495887e-06, "loss": 0.7738, "step": 19453 }, { "epoch": 0.6871503693336101, "grad_norm": 1.8348034620285034, "learning_rate": 2.354506639260679e-06, "loss": 0.7849, "step": 19454 }, { "epoch": 0.687185691137318, "grad_norm": 1.7688875198364258, "learning_rate": 2.3540212716954873e-06, "loss": 0.7775, "step": 19455 }, { "epoch": 0.6872210129410259, "grad_norm": 1.5686837434768677, "learning_rate": 2.35353593876037e-06, "loss": 0.7919, "step": 19456 }, { "epoch": 0.6872563347447337, "grad_norm": 1.9443295001983643, "learning_rate": 2.353050640461675e-06, "loss": 0.8061, "step": 19457 }, { "epoch": 0.6872916565484416, "grad_norm": 1.6577147245407104, "learning_rate": 2.352565376805757e-06, "loss": 0.8009, "step": 19458 }, { "epoch": 0.6873269783521495, "grad_norm": 1.6812175512313843, "learning_rate": 2.3520801477989642e-06, "loss": 0.7519, "step": 19459 }, { "epoch": 0.6873623001558574, "grad_norm": 1.6764607429504395, "learning_rate": 2.3515949534476483e-06, "loss": 0.7573, "step": 19460 }, { "epoch": 0.6873976219595653, "grad_norm": 1.6390714645385742, "learning_rate": 2.3511097937581623e-06, "loss": 0.7676, "step": 19461 }, { "epoch": 0.6874329437632732, "grad_norm": 2.0390396118164062, "learning_rate": 2.350624668736851e-06, "loss": 0.7505, "step": 19462 }, { "epoch": 0.6874682655669812, "grad_norm": 1.7564350366592407, "learning_rate": 2.3501395783900667e-06, "loss": 0.7897, "step": 19463 }, { "epoch": 0.6875035873706891, "grad_norm": 1.4864728450775146, "learning_rate": 2.3496545227241587e-06, "loss": 0.7278, "step": 19464 }, { "epoch": 0.687538909174397, "grad_norm": 1.6655625104904175, "learning_rate": 2.3491695017454725e-06, "loss": 0.7812, "step": 19465 }, { "epoch": 0.6875742309781049, "grad_norm": 1.7521488666534424, "learning_rate": 2.3486845154603572e-06, "loss": 0.7674, "step": 19466 }, { "epoch": 0.6876095527818128, "grad_norm": 1.643610954284668, "learning_rate": 2.3481995638751625e-06, "loss": 0.7901, "step": 19467 }, { "epoch": 0.6876448745855207, "grad_norm": 1.561661720275879, "learning_rate": 2.347714646996232e-06, "loss": 0.7834, "step": 19468 }, { "epoch": 0.6876801963892286, "grad_norm": 1.5142520666122437, "learning_rate": 2.347229764829913e-06, "loss": 0.7612, "step": 19469 }, { "epoch": 0.6877155181929365, "grad_norm": 2.1521694660186768, "learning_rate": 2.3467449173825545e-06, "loss": 0.8014, "step": 19470 }, { "epoch": 0.6877508399966444, "grad_norm": 1.686470866203308, "learning_rate": 2.3462601046604977e-06, "loss": 0.7977, "step": 19471 }, { "epoch": 0.6877861618003523, "grad_norm": 1.8937933444976807, "learning_rate": 2.34577532667009e-06, "loss": 0.7609, "step": 19472 }, { "epoch": 0.6878214836040603, "grad_norm": 1.7526323795318604, "learning_rate": 2.3452905834176775e-06, "loss": 0.772, "step": 19473 }, { "epoch": 0.6878568054077682, "grad_norm": 1.918696403503418, "learning_rate": 2.3448058749096015e-06, "loss": 0.7624, "step": 19474 }, { "epoch": 0.6878921272114761, "grad_norm": 1.6427003145217896, "learning_rate": 2.3443212011522083e-06, "loss": 0.7768, "step": 19475 }, { "epoch": 0.687927449015184, "grad_norm": 1.6674377918243408, "learning_rate": 2.3438365621518385e-06, "loss": 0.7422, "step": 19476 }, { "epoch": 0.6879627708188919, "grad_norm": 1.5676485300064087, "learning_rate": 2.3433519579148383e-06, "loss": 0.8087, "step": 19477 }, { "epoch": 0.6879980926225998, "grad_norm": 1.7867307662963867, "learning_rate": 2.3428673884475463e-06, "loss": 0.7956, "step": 19478 }, { "epoch": 0.6880334144263077, "grad_norm": 1.7096576690673828, "learning_rate": 2.3423828537563066e-06, "loss": 0.7402, "step": 19479 }, { "epoch": 0.6880687362300156, "grad_norm": 1.714539647102356, "learning_rate": 2.3418983538474622e-06, "loss": 0.768, "step": 19480 }, { "epoch": 0.6881040580337235, "grad_norm": 1.8683907985687256, "learning_rate": 2.3414138887273507e-06, "loss": 0.7962, "step": 19481 }, { "epoch": 0.6881393798374315, "grad_norm": 1.8227601051330566, "learning_rate": 2.3409294584023146e-06, "loss": 0.7531, "step": 19482 }, { "epoch": 0.6881747016411393, "grad_norm": 1.5431058406829834, "learning_rate": 2.3404450628786953e-06, "loss": 0.7781, "step": 19483 }, { "epoch": 0.6882100234448472, "grad_norm": 1.674146056175232, "learning_rate": 2.3399607021628294e-06, "loss": 0.7717, "step": 19484 }, { "epoch": 0.6882453452485551, "grad_norm": 1.6292380094528198, "learning_rate": 2.3394763762610577e-06, "loss": 0.781, "step": 19485 }, { "epoch": 0.688280667052263, "grad_norm": 1.7747832536697388, "learning_rate": 2.3389920851797217e-06, "loss": 0.7788, "step": 19486 }, { "epoch": 0.6883159888559709, "grad_norm": 1.9169564247131348, "learning_rate": 2.3385078289251545e-06, "loss": 0.791, "step": 19487 }, { "epoch": 0.6883513106596788, "grad_norm": 1.0655837059020996, "learning_rate": 2.3380236075036967e-06, "loss": 0.5904, "step": 19488 }, { "epoch": 0.6883866324633867, "grad_norm": 1.913353681564331, "learning_rate": 2.3375394209216857e-06, "loss": 0.7684, "step": 19489 }, { "epoch": 0.6884219542670946, "grad_norm": 0.9427186250686646, "learning_rate": 2.33705526918546e-06, "loss": 0.5481, "step": 19490 }, { "epoch": 0.6884572760708025, "grad_norm": 1.616226077079773, "learning_rate": 2.3365711523013525e-06, "loss": 0.7565, "step": 19491 }, { "epoch": 0.6884925978745104, "grad_norm": 1.7222671508789062, "learning_rate": 2.3360870702757006e-06, "loss": 0.7843, "step": 19492 }, { "epoch": 0.6885279196782184, "grad_norm": 1.570910096168518, "learning_rate": 2.3356030231148447e-06, "loss": 0.7675, "step": 19493 }, { "epoch": 0.6885632414819263, "grad_norm": 1.7262359857559204, "learning_rate": 2.3351190108251113e-06, "loss": 0.7949, "step": 19494 }, { "epoch": 0.6885985632856342, "grad_norm": 1.645819067955017, "learning_rate": 2.3346350334128396e-06, "loss": 0.8092, "step": 19495 }, { "epoch": 0.6886338850893421, "grad_norm": 1.730208396911621, "learning_rate": 2.334151090884366e-06, "loss": 0.749, "step": 19496 }, { "epoch": 0.68866920689305, "grad_norm": 1.8711315393447876, "learning_rate": 2.3336671832460193e-06, "loss": 0.793, "step": 19497 }, { "epoch": 0.6887045286967579, "grad_norm": 1.7162941694259644, "learning_rate": 2.3331833105041356e-06, "loss": 0.7897, "step": 19498 }, { "epoch": 0.6887398505004658, "grad_norm": 1.5837035179138184, "learning_rate": 2.332699472665047e-06, "loss": 0.7724, "step": 19499 }, { "epoch": 0.6887751723041737, "grad_norm": 2.1141953468322754, "learning_rate": 2.332215669735089e-06, "loss": 0.7677, "step": 19500 }, { "epoch": 0.6888104941078816, "grad_norm": 1.8998357057571411, "learning_rate": 2.331731901720588e-06, "loss": 0.7516, "step": 19501 }, { "epoch": 0.6888458159115896, "grad_norm": 1.6934298276901245, "learning_rate": 2.331248168627879e-06, "loss": 0.7416, "step": 19502 }, { "epoch": 0.6888811377152975, "grad_norm": 1.5141322612762451, "learning_rate": 2.330764470463294e-06, "loss": 0.7677, "step": 19503 }, { "epoch": 0.6889164595190054, "grad_norm": 1.6089941263198853, "learning_rate": 2.3302808072331597e-06, "loss": 0.7381, "step": 19504 }, { "epoch": 0.6889517813227133, "grad_norm": 1.7218369245529175, "learning_rate": 2.3297971789438084e-06, "loss": 0.7792, "step": 19505 }, { "epoch": 0.6889871031264212, "grad_norm": 2.0127766132354736, "learning_rate": 2.329313585601571e-06, "loss": 0.7915, "step": 19506 }, { "epoch": 0.6890224249301291, "grad_norm": 1.5872266292572021, "learning_rate": 2.3288300272127745e-06, "loss": 0.7528, "step": 19507 }, { "epoch": 0.689057746733837, "grad_norm": 1.6226320266723633, "learning_rate": 2.328346503783748e-06, "loss": 0.7784, "step": 19508 }, { "epoch": 0.6890930685375448, "grad_norm": 1.673279047012329, "learning_rate": 2.3278630153208226e-06, "loss": 0.7774, "step": 19509 }, { "epoch": 0.6891283903412527, "grad_norm": 1.6761338710784912, "learning_rate": 2.3273795618303212e-06, "loss": 0.7658, "step": 19510 }, { "epoch": 0.6891637121449606, "grad_norm": 1.628112554550171, "learning_rate": 2.3268961433185744e-06, "loss": 0.7825, "step": 19511 }, { "epoch": 0.6891990339486685, "grad_norm": 1.6987576484680176, "learning_rate": 2.3264127597919094e-06, "loss": 0.7806, "step": 19512 }, { "epoch": 0.6892343557523765, "grad_norm": 1.6616666316986084, "learning_rate": 2.3259294112566524e-06, "loss": 0.7757, "step": 19513 }, { "epoch": 0.6892696775560844, "grad_norm": 1.6320300102233887, "learning_rate": 2.3254460977191264e-06, "loss": 0.7603, "step": 19514 }, { "epoch": 0.6893049993597923, "grad_norm": 2.040961503982544, "learning_rate": 2.3249628191856587e-06, "loss": 0.802, "step": 19515 }, { "epoch": 0.6893403211635002, "grad_norm": 1.7513186931610107, "learning_rate": 2.324479575662577e-06, "loss": 0.8189, "step": 19516 }, { "epoch": 0.6893756429672081, "grad_norm": 1.7854783535003662, "learning_rate": 2.323996367156202e-06, "loss": 0.796, "step": 19517 }, { "epoch": 0.689410964770916, "grad_norm": 1.6710505485534668, "learning_rate": 2.3235131936728594e-06, "loss": 0.7528, "step": 19518 }, { "epoch": 0.6894462865746239, "grad_norm": 1.6096065044403076, "learning_rate": 2.323030055218875e-06, "loss": 0.7649, "step": 19519 }, { "epoch": 0.6894816083783318, "grad_norm": 1.7244094610214233, "learning_rate": 2.3225469518005676e-06, "loss": 0.7289, "step": 19520 }, { "epoch": 0.6895169301820397, "grad_norm": 1.7069538831710815, "learning_rate": 2.3220638834242633e-06, "loss": 0.7964, "step": 19521 }, { "epoch": 0.6895522519857477, "grad_norm": 1.5902012586593628, "learning_rate": 2.3215808500962845e-06, "loss": 0.7826, "step": 19522 }, { "epoch": 0.6895875737894556, "grad_norm": 1.7726575136184692, "learning_rate": 2.3210978518229506e-06, "loss": 0.8175, "step": 19523 }, { "epoch": 0.6896228955931635, "grad_norm": 1.732452630996704, "learning_rate": 2.3206148886105846e-06, "loss": 0.7726, "step": 19524 }, { "epoch": 0.6896582173968714, "grad_norm": 1.7712842226028442, "learning_rate": 2.320131960465509e-06, "loss": 0.8036, "step": 19525 }, { "epoch": 0.6896935392005793, "grad_norm": 1.531706690788269, "learning_rate": 2.31964906739404e-06, "loss": 0.7533, "step": 19526 }, { "epoch": 0.6897288610042872, "grad_norm": 1.6583975553512573, "learning_rate": 2.3191662094025006e-06, "loss": 0.7795, "step": 19527 }, { "epoch": 0.6897641828079951, "grad_norm": 1.6742660999298096, "learning_rate": 2.3186833864972126e-06, "loss": 0.742, "step": 19528 }, { "epoch": 0.689799504611703, "grad_norm": 1.6254550218582153, "learning_rate": 2.31820059868449e-06, "loss": 0.7728, "step": 19529 }, { "epoch": 0.6898348264154109, "grad_norm": 1.8695482015609741, "learning_rate": 2.317717845970655e-06, "loss": 0.7617, "step": 19530 }, { "epoch": 0.6898701482191189, "grad_norm": 1.6191847324371338, "learning_rate": 2.317235128362023e-06, "loss": 0.7672, "step": 19531 }, { "epoch": 0.6899054700228268, "grad_norm": 1.6985061168670654, "learning_rate": 2.316752445864916e-06, "loss": 0.7428, "step": 19532 }, { "epoch": 0.6899407918265347, "grad_norm": 2.050445079803467, "learning_rate": 2.316269798485646e-06, "loss": 0.8076, "step": 19533 }, { "epoch": 0.6899761136302426, "grad_norm": 1.6917903423309326, "learning_rate": 2.3157871862305325e-06, "loss": 0.7626, "step": 19534 }, { "epoch": 0.6900114354339504, "grad_norm": 1.7433053255081177, "learning_rate": 2.315304609105894e-06, "loss": 0.8019, "step": 19535 }, { "epoch": 0.6900467572376583, "grad_norm": 1.811234951019287, "learning_rate": 2.3148220671180416e-06, "loss": 0.7595, "step": 19536 }, { "epoch": 0.6900820790413662, "grad_norm": 1.8219329118728638, "learning_rate": 2.314339560273294e-06, "loss": 0.7871, "step": 19537 }, { "epoch": 0.6901174008450741, "grad_norm": 1.9104270935058594, "learning_rate": 2.3138570885779663e-06, "loss": 0.7949, "step": 19538 }, { "epoch": 0.690152722648782, "grad_norm": 1.6901112794876099, "learning_rate": 2.313374652038371e-06, "loss": 0.7837, "step": 19539 }, { "epoch": 0.6901880444524899, "grad_norm": 1.826001763343811, "learning_rate": 2.3128922506608233e-06, "loss": 0.773, "step": 19540 }, { "epoch": 0.6902233662561978, "grad_norm": 1.727294921875, "learning_rate": 2.312409884451638e-06, "loss": 0.7892, "step": 19541 }, { "epoch": 0.6902586880599058, "grad_norm": 1.596463918685913, "learning_rate": 2.3119275534171253e-06, "loss": 0.7704, "step": 19542 }, { "epoch": 0.6902940098636137, "grad_norm": 1.7144747972488403, "learning_rate": 2.3114452575635994e-06, "loss": 0.7827, "step": 19543 }, { "epoch": 0.6903293316673216, "grad_norm": 1.5969326496124268, "learning_rate": 2.3109629968973747e-06, "loss": 0.7935, "step": 19544 }, { "epoch": 0.6903646534710295, "grad_norm": 1.5620415210723877, "learning_rate": 2.3104807714247586e-06, "loss": 0.7227, "step": 19545 }, { "epoch": 0.6903999752747374, "grad_norm": 1.6408276557922363, "learning_rate": 2.3099985811520646e-06, "loss": 0.7988, "step": 19546 }, { "epoch": 0.6904352970784453, "grad_norm": 1.7420551776885986, "learning_rate": 2.3095164260856055e-06, "loss": 0.7791, "step": 19547 }, { "epoch": 0.6904706188821532, "grad_norm": 1.651221752166748, "learning_rate": 2.3090343062316876e-06, "loss": 0.8159, "step": 19548 }, { "epoch": 0.6905059406858611, "grad_norm": 1.6579285860061646, "learning_rate": 2.3085522215966255e-06, "loss": 0.8093, "step": 19549 }, { "epoch": 0.690541262489569, "grad_norm": 1.865281105041504, "learning_rate": 2.308070172186723e-06, "loss": 0.7854, "step": 19550 }, { "epoch": 0.690576584293277, "grad_norm": 1.754343867301941, "learning_rate": 2.3075881580082947e-06, "loss": 0.7433, "step": 19551 }, { "epoch": 0.6906119060969849, "grad_norm": 1.8675647974014282, "learning_rate": 2.307106179067645e-06, "loss": 0.784, "step": 19552 }, { "epoch": 0.6906472279006928, "grad_norm": 1.6308478116989136, "learning_rate": 2.306624235371082e-06, "loss": 0.7341, "step": 19553 }, { "epoch": 0.6906825497044007, "grad_norm": 1.6953678131103516, "learning_rate": 2.306142326924918e-06, "loss": 0.7808, "step": 19554 }, { "epoch": 0.6907178715081086, "grad_norm": 1.7704799175262451, "learning_rate": 2.3056604537354544e-06, "loss": 0.7452, "step": 19555 }, { "epoch": 0.6907531933118165, "grad_norm": 1.6549750566482544, "learning_rate": 2.305178615809e-06, "loss": 0.7635, "step": 19556 }, { "epoch": 0.6907885151155244, "grad_norm": 1.6726915836334229, "learning_rate": 2.304696813151863e-06, "loss": 0.7646, "step": 19557 }, { "epoch": 0.6908238369192323, "grad_norm": 1.9488321542739868, "learning_rate": 2.3042150457703465e-06, "loss": 0.7729, "step": 19558 }, { "epoch": 0.6908591587229402, "grad_norm": 1.799386978149414, "learning_rate": 2.303733313670756e-06, "loss": 0.7794, "step": 19559 }, { "epoch": 0.6908944805266481, "grad_norm": 1.7483960390090942, "learning_rate": 2.303251616859397e-06, "loss": 0.7766, "step": 19560 }, { "epoch": 0.690929802330356, "grad_norm": 1.7089060544967651, "learning_rate": 2.3027699553425763e-06, "loss": 0.7823, "step": 19561 }, { "epoch": 0.6909651241340639, "grad_norm": 1.783199667930603, "learning_rate": 2.3022883291265936e-06, "loss": 0.7703, "step": 19562 }, { "epoch": 0.6910004459377718, "grad_norm": 1.7169008255004883, "learning_rate": 2.3018067382177533e-06, "loss": 0.8272, "step": 19563 }, { "epoch": 0.6910357677414797, "grad_norm": 4.995482921600342, "learning_rate": 2.301325182622362e-06, "loss": 0.794, "step": 19564 }, { "epoch": 0.6910710895451876, "grad_norm": 2.2303426265716553, "learning_rate": 2.300843662346717e-06, "loss": 0.7714, "step": 19565 }, { "epoch": 0.6911064113488955, "grad_norm": 1.769802451133728, "learning_rate": 2.300362177397123e-06, "loss": 0.7782, "step": 19566 }, { "epoch": 0.6911417331526034, "grad_norm": 1.6302313804626465, "learning_rate": 2.299880727779885e-06, "loss": 0.7781, "step": 19567 }, { "epoch": 0.6911770549563113, "grad_norm": 1.6642760038375854, "learning_rate": 2.2993993135012964e-06, "loss": 0.7405, "step": 19568 }, { "epoch": 0.6912123767600192, "grad_norm": 1.6357630491256714, "learning_rate": 2.2989179345676626e-06, "loss": 0.7844, "step": 19569 }, { "epoch": 0.6912476985637271, "grad_norm": 1.6538441181182861, "learning_rate": 2.2984365909852825e-06, "loss": 0.7772, "step": 19570 }, { "epoch": 0.691283020367435, "grad_norm": 1.8191864490509033, "learning_rate": 2.297955282760459e-06, "loss": 0.7633, "step": 19571 }, { "epoch": 0.691318342171143, "grad_norm": 1.9827221632003784, "learning_rate": 2.2974740098994864e-06, "loss": 0.7624, "step": 19572 }, { "epoch": 0.6913536639748509, "grad_norm": 1.6144025325775146, "learning_rate": 2.296992772408666e-06, "loss": 0.7709, "step": 19573 }, { "epoch": 0.6913889857785588, "grad_norm": 1.9182409048080444, "learning_rate": 2.296511570294298e-06, "loss": 0.7754, "step": 19574 }, { "epoch": 0.6914243075822667, "grad_norm": 1.8711378574371338, "learning_rate": 2.2960304035626768e-06, "loss": 0.7671, "step": 19575 }, { "epoch": 0.6914596293859746, "grad_norm": 1.6652029752731323, "learning_rate": 2.295549272220101e-06, "loss": 0.7536, "step": 19576 }, { "epoch": 0.6914949511896825, "grad_norm": 1.691379427909851, "learning_rate": 2.29506817627287e-06, "loss": 0.7848, "step": 19577 }, { "epoch": 0.6915302729933904, "grad_norm": 1.7090680599212646, "learning_rate": 2.2945871157272764e-06, "loss": 0.7765, "step": 19578 }, { "epoch": 0.6915655947970983, "grad_norm": 1.763508677482605, "learning_rate": 2.294106090589618e-06, "loss": 0.7805, "step": 19579 }, { "epoch": 0.6916009166008062, "grad_norm": 1.8293051719665527, "learning_rate": 2.293625100866192e-06, "loss": 0.7504, "step": 19580 }, { "epoch": 0.6916362384045142, "grad_norm": 1.7198153734207153, "learning_rate": 2.2931441465632904e-06, "loss": 0.7987, "step": 19581 }, { "epoch": 0.6916715602082221, "grad_norm": 1.6362619400024414, "learning_rate": 2.292663227687209e-06, "loss": 0.7403, "step": 19582 }, { "epoch": 0.69170688201193, "grad_norm": 2.042386293411255, "learning_rate": 2.2921823442442447e-06, "loss": 0.7505, "step": 19583 }, { "epoch": 0.6917422038156379, "grad_norm": 1.6909425258636475, "learning_rate": 2.291701496240687e-06, "loss": 0.755, "step": 19584 }, { "epoch": 0.6917775256193458, "grad_norm": 1.6128270626068115, "learning_rate": 2.291220683682831e-06, "loss": 0.7592, "step": 19585 }, { "epoch": 0.6918128474230537, "grad_norm": 1.8078111410140991, "learning_rate": 2.290739906576971e-06, "loss": 0.7807, "step": 19586 }, { "epoch": 0.6918481692267615, "grad_norm": 1.8515040874481201, "learning_rate": 2.290259164929398e-06, "loss": 0.7589, "step": 19587 }, { "epoch": 0.6918834910304694, "grad_norm": 1.5543532371520996, "learning_rate": 2.2897784587464017e-06, "loss": 0.7778, "step": 19588 }, { "epoch": 0.6919188128341773, "grad_norm": 1.5779354572296143, "learning_rate": 2.289297788034276e-06, "loss": 0.7789, "step": 19589 }, { "epoch": 0.6919541346378852, "grad_norm": 1.752951741218567, "learning_rate": 2.2888171527993124e-06, "loss": 0.7536, "step": 19590 }, { "epoch": 0.6919894564415932, "grad_norm": 1.5959872007369995, "learning_rate": 2.288336553047799e-06, "loss": 0.7444, "step": 19591 }, { "epoch": 0.6920247782453011, "grad_norm": 1.5811593532562256, "learning_rate": 2.287855988786027e-06, "loss": 0.7439, "step": 19592 }, { "epoch": 0.692060100049009, "grad_norm": 1.6035789251327515, "learning_rate": 2.2873754600202875e-06, "loss": 0.7853, "step": 19593 }, { "epoch": 0.6920954218527169, "grad_norm": 1.6047568321228027, "learning_rate": 2.286894966756866e-06, "loss": 0.7468, "step": 19594 }, { "epoch": 0.6921307436564248, "grad_norm": 1.6943995952606201, "learning_rate": 2.286414509002054e-06, "loss": 0.7269, "step": 19595 }, { "epoch": 0.6921660654601327, "grad_norm": 2.7116541862487793, "learning_rate": 2.28593408676214e-06, "loss": 0.7977, "step": 19596 }, { "epoch": 0.6922013872638406, "grad_norm": 1.6629064083099365, "learning_rate": 2.285453700043409e-06, "loss": 0.7764, "step": 19597 }, { "epoch": 0.6922367090675485, "grad_norm": 1.8529720306396484, "learning_rate": 2.28497334885215e-06, "loss": 0.765, "step": 19598 }, { "epoch": 0.6922720308712564, "grad_norm": 2.6729114055633545, "learning_rate": 2.2844930331946502e-06, "loss": 0.7541, "step": 19599 }, { "epoch": 0.6923073526749643, "grad_norm": 1.7037973403930664, "learning_rate": 2.284012753077194e-06, "loss": 0.7928, "step": 19600 }, { "epoch": 0.6923426744786723, "grad_norm": 1.6077035665512085, "learning_rate": 2.2835325085060683e-06, "loss": 0.7593, "step": 19601 }, { "epoch": 0.6923779962823802, "grad_norm": 1.6785615682601929, "learning_rate": 2.2830522994875603e-06, "loss": 0.8142, "step": 19602 }, { "epoch": 0.6924133180860881, "grad_norm": 1.9218382835388184, "learning_rate": 2.282572126027951e-06, "loss": 0.7489, "step": 19603 }, { "epoch": 0.692448639889796, "grad_norm": 1.8174232244491577, "learning_rate": 2.282091988133529e-06, "loss": 0.7769, "step": 19604 }, { "epoch": 0.6924839616935039, "grad_norm": 2.1704976558685303, "learning_rate": 2.2816118858105747e-06, "loss": 0.8037, "step": 19605 }, { "epoch": 0.6925192834972118, "grad_norm": 1.6874171495437622, "learning_rate": 2.281131819065374e-06, "loss": 0.7617, "step": 19606 }, { "epoch": 0.6925546053009197, "grad_norm": 1.6825530529022217, "learning_rate": 2.280651787904208e-06, "loss": 0.7934, "step": 19607 }, { "epoch": 0.6925899271046276, "grad_norm": 2.060471296310425, "learning_rate": 2.2801717923333595e-06, "loss": 0.7838, "step": 19608 }, { "epoch": 0.6926252489083355, "grad_norm": 1.9201582670211792, "learning_rate": 2.279691832359113e-06, "loss": 0.7685, "step": 19609 }, { "epoch": 0.6926605707120435, "grad_norm": 1.820533275604248, "learning_rate": 2.2792119079877474e-06, "loss": 0.7583, "step": 19610 }, { "epoch": 0.6926958925157514, "grad_norm": 2.087221384048462, "learning_rate": 2.2787320192255446e-06, "loss": 0.7904, "step": 19611 }, { "epoch": 0.6927312143194593, "grad_norm": 1.5094960927963257, "learning_rate": 2.278252166078787e-06, "loss": 0.7722, "step": 19612 }, { "epoch": 0.6927665361231671, "grad_norm": 1.7797112464904785, "learning_rate": 2.277772348553752e-06, "loss": 0.739, "step": 19613 }, { "epoch": 0.692801857926875, "grad_norm": 1.606798529624939, "learning_rate": 2.277292566656721e-06, "loss": 0.7864, "step": 19614 }, { "epoch": 0.6928371797305829, "grad_norm": 1.5975878238677979, "learning_rate": 2.2768128203939744e-06, "loss": 0.7679, "step": 19615 }, { "epoch": 0.6928725015342908, "grad_norm": 0.9196064472198486, "learning_rate": 2.276333109771788e-06, "loss": 0.5623, "step": 19616 }, { "epoch": 0.6929078233379987, "grad_norm": 1.6161079406738281, "learning_rate": 2.2758534347964417e-06, "loss": 0.7778, "step": 19617 }, { "epoch": 0.6929431451417066, "grad_norm": 1.6866028308868408, "learning_rate": 2.2753737954742137e-06, "loss": 0.77, "step": 19618 }, { "epoch": 0.6929784669454145, "grad_norm": 1.4441946744918823, "learning_rate": 2.2748941918113837e-06, "loss": 0.7881, "step": 19619 }, { "epoch": 0.6930137887491225, "grad_norm": 1.7618292570114136, "learning_rate": 2.274414623814224e-06, "loss": 0.7918, "step": 19620 }, { "epoch": 0.6930491105528304, "grad_norm": 1.5980546474456787, "learning_rate": 2.2739350914890127e-06, "loss": 0.7611, "step": 19621 }, { "epoch": 0.6930844323565383, "grad_norm": 1.6546406745910645, "learning_rate": 2.2734555948420295e-06, "loss": 0.7279, "step": 19622 }, { "epoch": 0.6931197541602462, "grad_norm": 2.1780078411102295, "learning_rate": 2.2729761338795465e-06, "loss": 0.7586, "step": 19623 }, { "epoch": 0.6931550759639541, "grad_norm": 1.776987075805664, "learning_rate": 2.272496708607837e-06, "loss": 0.7881, "step": 19624 }, { "epoch": 0.693190397767662, "grad_norm": 2.0246028900146484, "learning_rate": 2.2720173190331802e-06, "loss": 0.7558, "step": 19625 }, { "epoch": 0.6932257195713699, "grad_norm": 1.7750575542449951, "learning_rate": 2.2715379651618453e-06, "loss": 0.8056, "step": 19626 }, { "epoch": 0.6932610413750778, "grad_norm": 1.6472887992858887, "learning_rate": 2.2710586470001095e-06, "loss": 0.7726, "step": 19627 }, { "epoch": 0.6932963631787857, "grad_norm": 1.8888729810714722, "learning_rate": 2.2705793645542446e-06, "loss": 0.7633, "step": 19628 }, { "epoch": 0.6933316849824936, "grad_norm": 1.9111876487731934, "learning_rate": 2.270100117830526e-06, "loss": 0.7756, "step": 19629 }, { "epoch": 0.6933670067862016, "grad_norm": 1.748381495475769, "learning_rate": 2.269620906835222e-06, "loss": 0.7669, "step": 19630 }, { "epoch": 0.6934023285899095, "grad_norm": 1.582706093788147, "learning_rate": 2.2691417315746063e-06, "loss": 0.7881, "step": 19631 }, { "epoch": 0.6934376503936174, "grad_norm": 1.895943522453308, "learning_rate": 2.268662592054952e-06, "loss": 0.8228, "step": 19632 }, { "epoch": 0.6934729721973253, "grad_norm": 1.993125557899475, "learning_rate": 2.2681834882825265e-06, "loss": 0.7688, "step": 19633 }, { "epoch": 0.6935082940010332, "grad_norm": 1.700640082359314, "learning_rate": 2.2677044202636024e-06, "loss": 0.7409, "step": 19634 }, { "epoch": 0.6935436158047411, "grad_norm": 1.6230199337005615, "learning_rate": 2.26722538800445e-06, "loss": 0.776, "step": 19635 }, { "epoch": 0.693578937608449, "grad_norm": 1.59468674659729, "learning_rate": 2.2667463915113374e-06, "loss": 0.7808, "step": 19636 }, { "epoch": 0.6936142594121569, "grad_norm": 1.636825680732727, "learning_rate": 2.2662674307905338e-06, "loss": 0.7742, "step": 19637 }, { "epoch": 0.6936495812158648, "grad_norm": 1.7406455278396606, "learning_rate": 2.26578850584831e-06, "loss": 0.762, "step": 19638 }, { "epoch": 0.6936849030195726, "grad_norm": 1.6493979692459106, "learning_rate": 2.26530961669093e-06, "loss": 0.7816, "step": 19639 }, { "epoch": 0.6937202248232806, "grad_norm": 1.8077735900878906, "learning_rate": 2.264830763324664e-06, "loss": 0.8017, "step": 19640 }, { "epoch": 0.6937555466269885, "grad_norm": 0.9958282709121704, "learning_rate": 2.2643519457557807e-06, "loss": 0.5476, "step": 19641 }, { "epoch": 0.6937908684306964, "grad_norm": 1.663546085357666, "learning_rate": 2.2638731639905447e-06, "loss": 0.8162, "step": 19642 }, { "epoch": 0.6938261902344043, "grad_norm": 1.770150899887085, "learning_rate": 2.2633944180352206e-06, "loss": 0.8058, "step": 19643 }, { "epoch": 0.6938615120381122, "grad_norm": 1.802315354347229, "learning_rate": 2.262915707896076e-06, "loss": 0.7568, "step": 19644 }, { "epoch": 0.6938968338418201, "grad_norm": 1.5620415210723877, "learning_rate": 2.2624370335793783e-06, "loss": 0.7797, "step": 19645 }, { "epoch": 0.693932155645528, "grad_norm": 1.696192741394043, "learning_rate": 2.261958395091388e-06, "loss": 0.7896, "step": 19646 }, { "epoch": 0.6939674774492359, "grad_norm": 1.540405511856079, "learning_rate": 2.261479792438371e-06, "loss": 0.7575, "step": 19647 }, { "epoch": 0.6940027992529438, "grad_norm": 1.9770700931549072, "learning_rate": 2.2610012256265945e-06, "loss": 0.8121, "step": 19648 }, { "epoch": 0.6940381210566517, "grad_norm": 1.6288676261901855, "learning_rate": 2.2605226946623164e-06, "loss": 0.7566, "step": 19649 }, { "epoch": 0.6940734428603597, "grad_norm": 0.8989657759666443, "learning_rate": 2.260044199551803e-06, "loss": 0.6002, "step": 19650 }, { "epoch": 0.6941087646640676, "grad_norm": 1.6550633907318115, "learning_rate": 2.2595657403013173e-06, "loss": 0.7606, "step": 19651 }, { "epoch": 0.6941440864677755, "grad_norm": 1.4947479963302612, "learning_rate": 2.2590873169171184e-06, "loss": 0.7622, "step": 19652 }, { "epoch": 0.6941794082714834, "grad_norm": 1.6910524368286133, "learning_rate": 2.2586089294054695e-06, "loss": 0.7773, "step": 19653 }, { "epoch": 0.6942147300751913, "grad_norm": 1.8321702480316162, "learning_rate": 2.2581305777726337e-06, "loss": 0.7559, "step": 19654 }, { "epoch": 0.6942500518788992, "grad_norm": 1.6101927757263184, "learning_rate": 2.2576522620248675e-06, "loss": 0.7675, "step": 19655 }, { "epoch": 0.6942853736826071, "grad_norm": 1.8623695373535156, "learning_rate": 2.2571739821684325e-06, "loss": 0.8092, "step": 19656 }, { "epoch": 0.694320695486315, "grad_norm": 3.3036603927612305, "learning_rate": 2.256695738209591e-06, "loss": 0.7735, "step": 19657 }, { "epoch": 0.6943560172900229, "grad_norm": 2.1971275806427, "learning_rate": 2.2562175301545987e-06, "loss": 0.7595, "step": 19658 }, { "epoch": 0.6943913390937309, "grad_norm": 1.6484966278076172, "learning_rate": 2.2557393580097147e-06, "loss": 0.7511, "step": 19659 }, { "epoch": 0.6944266608974388, "grad_norm": 1.760454535484314, "learning_rate": 2.2552612217812004e-06, "loss": 0.7789, "step": 19660 }, { "epoch": 0.6944619827011467, "grad_norm": 1.5838286876678467, "learning_rate": 2.2547831214753113e-06, "loss": 0.7893, "step": 19661 }, { "epoch": 0.6944973045048546, "grad_norm": 1.8463228940963745, "learning_rate": 2.2543050570983026e-06, "loss": 0.7354, "step": 19662 }, { "epoch": 0.6945326263085625, "grad_norm": 1.7641123533248901, "learning_rate": 2.2538270286564333e-06, "loss": 0.7853, "step": 19663 }, { "epoch": 0.6945679481122704, "grad_norm": 1.7465107440948486, "learning_rate": 2.2533490361559617e-06, "loss": 0.7673, "step": 19664 }, { "epoch": 0.6946032699159782, "grad_norm": 1.6541579961776733, "learning_rate": 2.2528710796031394e-06, "loss": 0.8284, "step": 19665 }, { "epoch": 0.6946385917196861, "grad_norm": 1.5604634284973145, "learning_rate": 2.2523931590042242e-06, "loss": 0.7495, "step": 19666 }, { "epoch": 0.694673913523394, "grad_norm": 1.5376161336898804, "learning_rate": 2.251915274365473e-06, "loss": 0.7865, "step": 19667 }, { "epoch": 0.6947092353271019, "grad_norm": 1.7135546207427979, "learning_rate": 2.251437425693136e-06, "loss": 0.7968, "step": 19668 }, { "epoch": 0.6947445571308098, "grad_norm": 1.6828142404556274, "learning_rate": 2.250959612993469e-06, "loss": 0.7617, "step": 19669 }, { "epoch": 0.6947798789345178, "grad_norm": 1.7148562669754028, "learning_rate": 2.2504818362727277e-06, "loss": 0.7794, "step": 19670 }, { "epoch": 0.6948152007382257, "grad_norm": 1.619266390800476, "learning_rate": 2.2500040955371614e-06, "loss": 0.78, "step": 19671 }, { "epoch": 0.6948505225419336, "grad_norm": 1.620631217956543, "learning_rate": 2.249526390793025e-06, "loss": 0.7819, "step": 19672 }, { "epoch": 0.6948858443456415, "grad_norm": 1.6230428218841553, "learning_rate": 2.249048722046572e-06, "loss": 0.7342, "step": 19673 }, { "epoch": 0.6949211661493494, "grad_norm": 1.6666218042373657, "learning_rate": 2.24857108930405e-06, "loss": 0.768, "step": 19674 }, { "epoch": 0.6949564879530573, "grad_norm": 1.7426947355270386, "learning_rate": 2.2480934925717123e-06, "loss": 0.8178, "step": 19675 }, { "epoch": 0.6949918097567652, "grad_norm": 1.6104326248168945, "learning_rate": 2.24761593185581e-06, "loss": 0.7781, "step": 19676 }, { "epoch": 0.6950271315604731, "grad_norm": 1.8369145393371582, "learning_rate": 2.2471384071625945e-06, "loss": 0.8102, "step": 19677 }, { "epoch": 0.695062453364181, "grad_norm": 1.6855281591415405, "learning_rate": 2.2466609184983145e-06, "loss": 0.7631, "step": 19678 }, { "epoch": 0.695097775167889, "grad_norm": 1.80111825466156, "learning_rate": 2.2461834658692166e-06, "loss": 0.8124, "step": 19679 }, { "epoch": 0.6951330969715969, "grad_norm": 1.5673482418060303, "learning_rate": 2.245706049281554e-06, "loss": 0.7642, "step": 19680 }, { "epoch": 0.6951684187753048, "grad_norm": 1.7459921836853027, "learning_rate": 2.245228668741571e-06, "loss": 0.7721, "step": 19681 }, { "epoch": 0.6952037405790127, "grad_norm": 1.63055419921875, "learning_rate": 2.244751324255517e-06, "loss": 0.7549, "step": 19682 }, { "epoch": 0.6952390623827206, "grad_norm": 2.005722761154175, "learning_rate": 2.244274015829642e-06, "loss": 0.7966, "step": 19683 }, { "epoch": 0.6952743841864285, "grad_norm": 1.6467509269714355, "learning_rate": 2.2437967434701886e-06, "loss": 0.7459, "step": 19684 }, { "epoch": 0.6953097059901364, "grad_norm": 1.7391160726547241, "learning_rate": 2.243319507183405e-06, "loss": 0.7999, "step": 19685 }, { "epoch": 0.6953450277938443, "grad_norm": 1.7629197835922241, "learning_rate": 2.242842306975538e-06, "loss": 0.7884, "step": 19686 }, { "epoch": 0.6953803495975522, "grad_norm": 1.952009916305542, "learning_rate": 2.242365142852834e-06, "loss": 0.7567, "step": 19687 }, { "epoch": 0.6954156714012601, "grad_norm": 1.756496548652649, "learning_rate": 2.241888014821535e-06, "loss": 0.7853, "step": 19688 }, { "epoch": 0.6954509932049681, "grad_norm": 1.6675900220870972, "learning_rate": 2.2414109228878873e-06, "loss": 0.7461, "step": 19689 }, { "epoch": 0.695486315008676, "grad_norm": 1.874929428100586, "learning_rate": 2.2409338670581367e-06, "loss": 0.8042, "step": 19690 }, { "epoch": 0.6955216368123838, "grad_norm": 1.9808707237243652, "learning_rate": 2.240456847338523e-06, "loss": 0.7993, "step": 19691 }, { "epoch": 0.6955569586160917, "grad_norm": 1.6861461400985718, "learning_rate": 2.2399798637352916e-06, "loss": 0.7794, "step": 19692 }, { "epoch": 0.6955922804197996, "grad_norm": 1.7287834882736206, "learning_rate": 2.2395029162546866e-06, "loss": 0.7696, "step": 19693 }, { "epoch": 0.6956276022235075, "grad_norm": 1.5617444515228271, "learning_rate": 2.239026004902947e-06, "loss": 0.7432, "step": 19694 }, { "epoch": 0.6956629240272154, "grad_norm": 1.7844922542572021, "learning_rate": 2.2385491296863153e-06, "loss": 0.77, "step": 19695 }, { "epoch": 0.6956982458309233, "grad_norm": 1.6290507316589355, "learning_rate": 2.238072290611036e-06, "loss": 0.7333, "step": 19696 }, { "epoch": 0.6957335676346312, "grad_norm": 1.5819575786590576, "learning_rate": 2.2375954876833466e-06, "loss": 0.7836, "step": 19697 }, { "epoch": 0.6957688894383391, "grad_norm": 1.960059642791748, "learning_rate": 2.2371187209094875e-06, "loss": 0.7857, "step": 19698 }, { "epoch": 0.695804211242047, "grad_norm": 1.7071070671081543, "learning_rate": 2.2366419902956983e-06, "loss": 0.7495, "step": 19699 }, { "epoch": 0.695839533045755, "grad_norm": 1.716719150543213, "learning_rate": 2.2361652958482216e-06, "loss": 0.7368, "step": 19700 }, { "epoch": 0.6958748548494629, "grad_norm": 1.7542798519134521, "learning_rate": 2.2356886375732916e-06, "loss": 0.763, "step": 19701 }, { "epoch": 0.6959101766531708, "grad_norm": 1.5594638586044312, "learning_rate": 2.2352120154771496e-06, "loss": 0.7674, "step": 19702 }, { "epoch": 0.6959454984568787, "grad_norm": 1.5852526426315308, "learning_rate": 2.2347354295660345e-06, "loss": 0.7819, "step": 19703 }, { "epoch": 0.6959808202605866, "grad_norm": 1.64015531539917, "learning_rate": 2.2342588798461808e-06, "loss": 0.8065, "step": 19704 }, { "epoch": 0.6960161420642945, "grad_norm": 1.5726572275161743, "learning_rate": 2.233782366323827e-06, "loss": 0.7738, "step": 19705 }, { "epoch": 0.6960514638680024, "grad_norm": 1.8123598098754883, "learning_rate": 2.2333058890052105e-06, "loss": 0.78, "step": 19706 }, { "epoch": 0.6960867856717103, "grad_norm": 2.005286693572998, "learning_rate": 2.2328294478965656e-06, "loss": 0.7703, "step": 19707 }, { "epoch": 0.6961221074754183, "grad_norm": 1.623975396156311, "learning_rate": 2.2323530430041285e-06, "loss": 0.7932, "step": 19708 }, { "epoch": 0.6961574292791262, "grad_norm": 1.8616808652877808, "learning_rate": 2.2318766743341363e-06, "loss": 0.796, "step": 19709 }, { "epoch": 0.6961927510828341, "grad_norm": 1.6008498668670654, "learning_rate": 2.23140034189282e-06, "loss": 0.7911, "step": 19710 }, { "epoch": 0.696228072886542, "grad_norm": 1.4873720407485962, "learning_rate": 2.2309240456864156e-06, "loss": 0.7662, "step": 19711 }, { "epoch": 0.6962633946902499, "grad_norm": 1.7277147769927979, "learning_rate": 2.2304477857211586e-06, "loss": 0.7716, "step": 19712 }, { "epoch": 0.6962987164939578, "grad_norm": 1.8876291513442993, "learning_rate": 2.2299715620032787e-06, "loss": 0.7812, "step": 19713 }, { "epoch": 0.6963340382976657, "grad_norm": 1.7532793283462524, "learning_rate": 2.2294953745390098e-06, "loss": 0.7936, "step": 19714 }, { "epoch": 0.6963693601013736, "grad_norm": 1.8163282871246338, "learning_rate": 2.229019223334587e-06, "loss": 0.7573, "step": 19715 }, { "epoch": 0.6964046819050815, "grad_norm": 1.779972791671753, "learning_rate": 2.2285431083962395e-06, "loss": 0.7795, "step": 19716 }, { "epoch": 0.6964400037087893, "grad_norm": 1.7596185207366943, "learning_rate": 2.2280670297301967e-06, "loss": 0.7694, "step": 19717 }, { "epoch": 0.6964753255124972, "grad_norm": 1.6992548704147339, "learning_rate": 2.2275909873426923e-06, "loss": 0.7873, "step": 19718 }, { "epoch": 0.6965106473162052, "grad_norm": 1.638152837753296, "learning_rate": 2.2271149812399567e-06, "loss": 0.7795, "step": 19719 }, { "epoch": 0.6965459691199131, "grad_norm": 1.6590838432312012, "learning_rate": 2.2266390114282177e-06, "loss": 0.7579, "step": 19720 }, { "epoch": 0.696581290923621, "grad_norm": 1.6936384439468384, "learning_rate": 2.226163077913706e-06, "loss": 0.764, "step": 19721 }, { "epoch": 0.6966166127273289, "grad_norm": 1.5159525871276855, "learning_rate": 2.2256871807026526e-06, "loss": 0.7419, "step": 19722 }, { "epoch": 0.6966519345310368, "grad_norm": 1.5655250549316406, "learning_rate": 2.2252113198012813e-06, "loss": 0.741, "step": 19723 }, { "epoch": 0.6966872563347447, "grad_norm": 1.7117412090301514, "learning_rate": 2.2247354952158233e-06, "loss": 0.7595, "step": 19724 }, { "epoch": 0.6967225781384526, "grad_norm": 1.5724588632583618, "learning_rate": 2.2242597069525074e-06, "loss": 0.7669, "step": 19725 }, { "epoch": 0.6967578999421605, "grad_norm": 2.1512296199798584, "learning_rate": 2.223783955017556e-06, "loss": 0.8016, "step": 19726 }, { "epoch": 0.6967932217458684, "grad_norm": 1.7748042345046997, "learning_rate": 2.223308239417199e-06, "loss": 0.7556, "step": 19727 }, { "epoch": 0.6968285435495764, "grad_norm": 2.2966129779815674, "learning_rate": 2.2228325601576634e-06, "loss": 0.7861, "step": 19728 }, { "epoch": 0.6968638653532843, "grad_norm": 1.6570138931274414, "learning_rate": 2.2223569172451716e-06, "loss": 0.7205, "step": 19729 }, { "epoch": 0.6968991871569922, "grad_norm": 2.204906940460205, "learning_rate": 2.2218813106859506e-06, "loss": 0.8089, "step": 19730 }, { "epoch": 0.6969345089607001, "grad_norm": 1.5987695455551147, "learning_rate": 2.2214057404862262e-06, "loss": 0.7594, "step": 19731 }, { "epoch": 0.696969830764408, "grad_norm": 2.1940619945526123, "learning_rate": 2.2209302066522186e-06, "loss": 0.7891, "step": 19732 }, { "epoch": 0.6970051525681159, "grad_norm": 0.8858793377876282, "learning_rate": 2.220454709190157e-06, "loss": 0.5663, "step": 19733 }, { "epoch": 0.6970404743718238, "grad_norm": 2.0258963108062744, "learning_rate": 2.2199792481062595e-06, "loss": 0.7583, "step": 19734 }, { "epoch": 0.6970757961755317, "grad_norm": 2.854288101196289, "learning_rate": 2.219503823406753e-06, "loss": 0.7829, "step": 19735 }, { "epoch": 0.6971111179792396, "grad_norm": 1.5556741952896118, "learning_rate": 2.2190284350978554e-06, "loss": 0.7558, "step": 19736 }, { "epoch": 0.6971464397829475, "grad_norm": 1.690647840499878, "learning_rate": 2.218553083185791e-06, "loss": 0.77, "step": 19737 }, { "epoch": 0.6971817615866555, "grad_norm": 1.887495994567871, "learning_rate": 2.2180777676767835e-06, "loss": 0.777, "step": 19738 }, { "epoch": 0.6972170833903634, "grad_norm": 1.737282156944275, "learning_rate": 2.2176024885770487e-06, "loss": 0.7458, "step": 19739 }, { "epoch": 0.6972524051940713, "grad_norm": 1.545865535736084, "learning_rate": 2.2171272458928095e-06, "loss": 0.7807, "step": 19740 }, { "epoch": 0.6972877269977792, "grad_norm": 1.698395013809204, "learning_rate": 2.2166520396302875e-06, "loss": 0.7712, "step": 19741 }, { "epoch": 0.6973230488014871, "grad_norm": 1.6936652660369873, "learning_rate": 2.2161768697956993e-06, "loss": 0.7473, "step": 19742 }, { "epoch": 0.6973583706051949, "grad_norm": 2.0859463214874268, "learning_rate": 2.2157017363952643e-06, "loss": 0.7567, "step": 19743 }, { "epoch": 0.6973936924089028, "grad_norm": 1.7435250282287598, "learning_rate": 2.215226639435202e-06, "loss": 0.7778, "step": 19744 }, { "epoch": 0.6974290142126107, "grad_norm": 1.9456536769866943, "learning_rate": 2.2147515789217318e-06, "loss": 0.7883, "step": 19745 }, { "epoch": 0.6974643360163186, "grad_norm": 2.114363193511963, "learning_rate": 2.2142765548610674e-06, "loss": 0.7515, "step": 19746 }, { "epoch": 0.6974996578200265, "grad_norm": 1.7096786499023438, "learning_rate": 2.2138015672594277e-06, "loss": 0.8058, "step": 19747 }, { "epoch": 0.6975349796237345, "grad_norm": 1.7702304124832153, "learning_rate": 2.213326616123031e-06, "loss": 0.774, "step": 19748 }, { "epoch": 0.6975703014274424, "grad_norm": 1.7044061422348022, "learning_rate": 2.2128517014580903e-06, "loss": 0.7621, "step": 19749 }, { "epoch": 0.6976056232311503, "grad_norm": 1.8889895677566528, "learning_rate": 2.212376823270822e-06, "loss": 0.7927, "step": 19750 }, { "epoch": 0.6976409450348582, "grad_norm": 1.6626205444335938, "learning_rate": 2.211901981567444e-06, "loss": 0.7871, "step": 19751 }, { "epoch": 0.6976762668385661, "grad_norm": 1.6044782400131226, "learning_rate": 2.211427176354169e-06, "loss": 0.7635, "step": 19752 }, { "epoch": 0.697711588642274, "grad_norm": 1.7009878158569336, "learning_rate": 2.2109524076372084e-06, "loss": 0.7636, "step": 19753 }, { "epoch": 0.6977469104459819, "grad_norm": 1.7255260944366455, "learning_rate": 2.210477675422779e-06, "loss": 0.7443, "step": 19754 }, { "epoch": 0.6977822322496898, "grad_norm": 1.6197456121444702, "learning_rate": 2.210002979717095e-06, "loss": 0.7688, "step": 19755 }, { "epoch": 0.6978175540533977, "grad_norm": 1.698120355606079, "learning_rate": 2.209528320526366e-06, "loss": 0.7708, "step": 19756 }, { "epoch": 0.6978528758571056, "grad_norm": 1.7327560186386108, "learning_rate": 2.2090536978568054e-06, "loss": 0.7418, "step": 19757 }, { "epoch": 0.6978881976608136, "grad_norm": 0.9615014791488647, "learning_rate": 2.2085791117146278e-06, "loss": 0.5705, "step": 19758 }, { "epoch": 0.6979235194645215, "grad_norm": 1.8269938230514526, "learning_rate": 2.20810456210604e-06, "loss": 0.7576, "step": 19759 }, { "epoch": 0.6979588412682294, "grad_norm": 1.5131418704986572, "learning_rate": 2.2076300490372545e-06, "loss": 0.7767, "step": 19760 }, { "epoch": 0.6979941630719373, "grad_norm": 1.8211066722869873, "learning_rate": 2.207155572514484e-06, "loss": 0.8131, "step": 19761 }, { "epoch": 0.6980294848756452, "grad_norm": 1.6828042268753052, "learning_rate": 2.2066811325439346e-06, "loss": 0.778, "step": 19762 }, { "epoch": 0.6980648066793531, "grad_norm": 1.5575071573257446, "learning_rate": 2.2062067291318173e-06, "loss": 0.7388, "step": 19763 }, { "epoch": 0.698100128483061, "grad_norm": 2.063171625137329, "learning_rate": 2.2057323622843437e-06, "loss": 0.783, "step": 19764 }, { "epoch": 0.6981354502867689, "grad_norm": 1.6158164739608765, "learning_rate": 2.205258032007717e-06, "loss": 0.761, "step": 19765 }, { "epoch": 0.6981707720904768, "grad_norm": 1.743194818496704, "learning_rate": 2.204783738308148e-06, "loss": 0.7699, "step": 19766 }, { "epoch": 0.6982060938941848, "grad_norm": 1.6520086526870728, "learning_rate": 2.204309481191846e-06, "loss": 0.7748, "step": 19767 }, { "epoch": 0.6982414156978927, "grad_norm": 1.7445168495178223, "learning_rate": 2.2038352606650144e-06, "loss": 0.7677, "step": 19768 }, { "epoch": 0.6982767375016005, "grad_norm": 1.7227479219436646, "learning_rate": 2.2033610767338602e-06, "loss": 0.7846, "step": 19769 }, { "epoch": 0.6983120593053084, "grad_norm": 1.5388208627700806, "learning_rate": 2.2028869294045934e-06, "loss": 0.7929, "step": 19770 }, { "epoch": 0.6983473811090163, "grad_norm": 1.973363995552063, "learning_rate": 2.2024128186834167e-06, "loss": 0.7909, "step": 19771 }, { "epoch": 0.6983827029127242, "grad_norm": 1.7892857789993286, "learning_rate": 2.201938744576533e-06, "loss": 0.7637, "step": 19772 }, { "epoch": 0.6984180247164321, "grad_norm": 1.592815637588501, "learning_rate": 2.201464707090149e-06, "loss": 0.7628, "step": 19773 }, { "epoch": 0.69845334652014, "grad_norm": 1.7079277038574219, "learning_rate": 2.20099070623047e-06, "loss": 0.7815, "step": 19774 }, { "epoch": 0.6984886683238479, "grad_norm": 1.6640132665634155, "learning_rate": 2.2005167420036976e-06, "loss": 0.7771, "step": 19775 }, { "epoch": 0.6985239901275558, "grad_norm": 1.596321940422058, "learning_rate": 2.2000428144160357e-06, "loss": 0.7617, "step": 19776 }, { "epoch": 0.6985593119312637, "grad_norm": 2.468458414077759, "learning_rate": 2.199568923473689e-06, "loss": 0.7841, "step": 19777 }, { "epoch": 0.6985946337349717, "grad_norm": 1.6598814725875854, "learning_rate": 2.1990950691828554e-06, "loss": 0.7421, "step": 19778 }, { "epoch": 0.6986299555386796, "grad_norm": 2.053518295288086, "learning_rate": 2.1986212515497398e-06, "loss": 0.7616, "step": 19779 }, { "epoch": 0.6986652773423875, "grad_norm": 1.516042709350586, "learning_rate": 2.198147470580544e-06, "loss": 0.7657, "step": 19780 }, { "epoch": 0.6987005991460954, "grad_norm": 1.5966264009475708, "learning_rate": 2.1976737262814656e-06, "loss": 0.7548, "step": 19781 }, { "epoch": 0.6987359209498033, "grad_norm": 1.6908866167068481, "learning_rate": 2.1972000186587068e-06, "loss": 0.799, "step": 19782 }, { "epoch": 0.6987712427535112, "grad_norm": 1.5334181785583496, "learning_rate": 2.196726347718469e-06, "loss": 0.7637, "step": 19783 }, { "epoch": 0.6988065645572191, "grad_norm": 1.653217077255249, "learning_rate": 2.196252713466948e-06, "loss": 0.7889, "step": 19784 }, { "epoch": 0.698841886360927, "grad_norm": 1.5872688293457031, "learning_rate": 2.195779115910345e-06, "loss": 0.7577, "step": 19785 }, { "epoch": 0.6988772081646349, "grad_norm": 1.590113639831543, "learning_rate": 2.1953055550548593e-06, "loss": 0.7784, "step": 19786 }, { "epoch": 0.6989125299683429, "grad_norm": 1.8441195487976074, "learning_rate": 2.194832030906685e-06, "loss": 0.7912, "step": 19787 }, { "epoch": 0.6989478517720508, "grad_norm": 1.707305908203125, "learning_rate": 2.1943585434720216e-06, "loss": 0.7835, "step": 19788 }, { "epoch": 0.6989831735757587, "grad_norm": 1.777114748954773, "learning_rate": 2.1938850927570687e-06, "loss": 0.7826, "step": 19789 }, { "epoch": 0.6990184953794666, "grad_norm": 1.8336341381072998, "learning_rate": 2.19341167876802e-06, "loss": 0.8222, "step": 19790 }, { "epoch": 0.6990538171831745, "grad_norm": 1.731731653213501, "learning_rate": 2.19293830151107e-06, "loss": 0.7515, "step": 19791 }, { "epoch": 0.6990891389868824, "grad_norm": 2.199965238571167, "learning_rate": 2.1924649609924153e-06, "loss": 0.8128, "step": 19792 }, { "epoch": 0.6991244607905903, "grad_norm": 1.7538397312164307, "learning_rate": 2.1919916572182538e-06, "loss": 0.7396, "step": 19793 }, { "epoch": 0.6991597825942982, "grad_norm": 1.8535231351852417, "learning_rate": 2.1915183901947758e-06, "loss": 0.768, "step": 19794 }, { "epoch": 0.699195104398006, "grad_norm": 0.8760334253311157, "learning_rate": 2.191045159928177e-06, "loss": 0.5758, "step": 19795 }, { "epoch": 0.6992304262017139, "grad_norm": 1.9733984470367432, "learning_rate": 2.1905719664246528e-06, "loss": 0.7567, "step": 19796 }, { "epoch": 0.6992657480054218, "grad_norm": 5.799838066101074, "learning_rate": 2.1900988096903927e-06, "loss": 0.7778, "step": 19797 }, { "epoch": 0.6993010698091298, "grad_norm": 1.95718252658844, "learning_rate": 2.1896256897315913e-06, "loss": 0.8014, "step": 19798 }, { "epoch": 0.6993363916128377, "grad_norm": 1.7229442596435547, "learning_rate": 2.1891526065544427e-06, "loss": 0.7767, "step": 19799 }, { "epoch": 0.6993717134165456, "grad_norm": 1.864462971687317, "learning_rate": 2.188679560165134e-06, "loss": 0.7858, "step": 19800 }, { "epoch": 0.6994070352202535, "grad_norm": 1.7234323024749756, "learning_rate": 2.188206550569859e-06, "loss": 0.7976, "step": 19801 }, { "epoch": 0.6994423570239614, "grad_norm": 1.589074730873108, "learning_rate": 2.187733577774809e-06, "loss": 0.7456, "step": 19802 }, { "epoch": 0.6994776788276693, "grad_norm": 1.6024118661880493, "learning_rate": 2.1872606417861737e-06, "loss": 0.775, "step": 19803 }, { "epoch": 0.6995130006313772, "grad_norm": 1.7499333620071411, "learning_rate": 2.1867877426101413e-06, "loss": 0.7428, "step": 19804 }, { "epoch": 0.6995483224350851, "grad_norm": 1.605221152305603, "learning_rate": 2.1863148802529023e-06, "loss": 0.7832, "step": 19805 }, { "epoch": 0.699583644238793, "grad_norm": 1.9248385429382324, "learning_rate": 2.1858420547206465e-06, "loss": 0.7777, "step": 19806 }, { "epoch": 0.699618966042501, "grad_norm": 2.126389980316162, "learning_rate": 2.185369266019561e-06, "loss": 0.7451, "step": 19807 }, { "epoch": 0.6996542878462089, "grad_norm": 1.9830818176269531, "learning_rate": 2.184896514155832e-06, "loss": 0.7494, "step": 19808 }, { "epoch": 0.6996896096499168, "grad_norm": 2.1701467037200928, "learning_rate": 2.18442379913565e-06, "loss": 0.8008, "step": 19809 }, { "epoch": 0.6997249314536247, "grad_norm": 1.7973554134368896, "learning_rate": 2.1839511209651978e-06, "loss": 0.7762, "step": 19810 }, { "epoch": 0.6997602532573326, "grad_norm": 1.805682897567749, "learning_rate": 2.1834784796506643e-06, "loss": 0.7485, "step": 19811 }, { "epoch": 0.6997955750610405, "grad_norm": 1.7232391834259033, "learning_rate": 2.183005875198235e-06, "loss": 0.7803, "step": 19812 }, { "epoch": 0.6998308968647484, "grad_norm": 1.593338966369629, "learning_rate": 2.1825333076140964e-06, "loss": 0.7654, "step": 19813 }, { "epoch": 0.6998662186684563, "grad_norm": 1.6794589757919312, "learning_rate": 2.182060776904431e-06, "loss": 0.7836, "step": 19814 }, { "epoch": 0.6999015404721642, "grad_norm": 1.8739538192749023, "learning_rate": 2.181588283075425e-06, "loss": 0.7438, "step": 19815 }, { "epoch": 0.6999368622758722, "grad_norm": 1.6160136461257935, "learning_rate": 2.1811158261332626e-06, "loss": 0.7471, "step": 19816 }, { "epoch": 0.6999721840795801, "grad_norm": 1.7089134454727173, "learning_rate": 2.1806434060841252e-06, "loss": 0.7612, "step": 19817 }, { "epoch": 0.700007505883288, "grad_norm": 1.6888692378997803, "learning_rate": 2.1801710229341965e-06, "loss": 0.7945, "step": 19818 }, { "epoch": 0.7000428276869959, "grad_norm": 1.7612230777740479, "learning_rate": 2.1796986766896618e-06, "loss": 0.7659, "step": 19819 }, { "epoch": 0.7000781494907038, "grad_norm": 1.9332349300384521, "learning_rate": 2.179226367356698e-06, "loss": 0.7518, "step": 19820 }, { "epoch": 0.7001134712944117, "grad_norm": 1.8130429983139038, "learning_rate": 2.1787540949414897e-06, "loss": 0.7917, "step": 19821 }, { "epoch": 0.7001487930981195, "grad_norm": 1.7578768730163574, "learning_rate": 2.17828185945022e-06, "loss": 0.7923, "step": 19822 }, { "epoch": 0.7001841149018274, "grad_norm": 1.6793882846832275, "learning_rate": 2.1778096608890636e-06, "loss": 0.7689, "step": 19823 }, { "epoch": 0.7002194367055353, "grad_norm": 1.819174885749817, "learning_rate": 2.177337499264205e-06, "loss": 0.783, "step": 19824 }, { "epoch": 0.7002547585092432, "grad_norm": 1.7070444822311401, "learning_rate": 2.1768653745818242e-06, "loss": 0.7801, "step": 19825 }, { "epoch": 0.7002900803129511, "grad_norm": 1.5287835597991943, "learning_rate": 2.176393286848098e-06, "loss": 0.7264, "step": 19826 }, { "epoch": 0.7003254021166591, "grad_norm": 1.7310653924942017, "learning_rate": 2.1759212360692044e-06, "loss": 0.7822, "step": 19827 }, { "epoch": 0.700360723920367, "grad_norm": 1.9382216930389404, "learning_rate": 2.175449222251323e-06, "loss": 0.7774, "step": 19828 }, { "epoch": 0.7003960457240749, "grad_norm": 1.934081792831421, "learning_rate": 2.174977245400633e-06, "loss": 0.8747, "step": 19829 }, { "epoch": 0.7004313675277828, "grad_norm": 2.0171048641204834, "learning_rate": 2.1745053055233077e-06, "loss": 0.7975, "step": 19830 }, { "epoch": 0.7004666893314907, "grad_norm": 1.688280463218689, "learning_rate": 2.174033402625526e-06, "loss": 0.763, "step": 19831 }, { "epoch": 0.7005020111351986, "grad_norm": 1.788772463798523, "learning_rate": 2.1735615367134656e-06, "loss": 0.7622, "step": 19832 }, { "epoch": 0.7005373329389065, "grad_norm": 1.5851768255233765, "learning_rate": 2.173089707793299e-06, "loss": 0.7549, "step": 19833 }, { "epoch": 0.7005726547426144, "grad_norm": 1.791730284690857, "learning_rate": 2.1726179158712022e-06, "loss": 0.812, "step": 19834 }, { "epoch": 0.7006079765463223, "grad_norm": 1.715821623802185, "learning_rate": 2.172146160953353e-06, "loss": 0.7924, "step": 19835 }, { "epoch": 0.7006432983500303, "grad_norm": 2.269139051437378, "learning_rate": 2.1716744430459215e-06, "loss": 0.7573, "step": 19836 }, { "epoch": 0.7006786201537382, "grad_norm": 1.8109773397445679, "learning_rate": 2.1712027621550825e-06, "loss": 0.7646, "step": 19837 }, { "epoch": 0.7007139419574461, "grad_norm": 1.6258039474487305, "learning_rate": 2.1707311182870123e-06, "loss": 0.7635, "step": 19838 }, { "epoch": 0.700749263761154, "grad_norm": 1.6390005350112915, "learning_rate": 2.1702595114478797e-06, "loss": 0.7874, "step": 19839 }, { "epoch": 0.7007845855648619, "grad_norm": 1.5686637163162231, "learning_rate": 2.1697879416438587e-06, "loss": 0.782, "step": 19840 }, { "epoch": 0.7008199073685698, "grad_norm": 1.6604849100112915, "learning_rate": 2.1693164088811226e-06, "loss": 0.7774, "step": 19841 }, { "epoch": 0.7008552291722777, "grad_norm": 1.8719758987426758, "learning_rate": 2.1688449131658394e-06, "loss": 0.751, "step": 19842 }, { "epoch": 0.7008905509759856, "grad_norm": 1.8145842552185059, "learning_rate": 2.1683734545041823e-06, "loss": 0.7498, "step": 19843 }, { "epoch": 0.7009258727796935, "grad_norm": 1.7036479711532593, "learning_rate": 2.167902032902323e-06, "loss": 0.779, "step": 19844 }, { "epoch": 0.7009611945834014, "grad_norm": 1.8002244234085083, "learning_rate": 2.1674306483664286e-06, "loss": 0.8294, "step": 19845 }, { "epoch": 0.7009965163871094, "grad_norm": 1.6311883926391602, "learning_rate": 2.1669593009026685e-06, "loss": 0.8045, "step": 19846 }, { "epoch": 0.7010318381908173, "grad_norm": 1.5737544298171997, "learning_rate": 2.166487990517212e-06, "loss": 0.7587, "step": 19847 }, { "epoch": 0.7010671599945251, "grad_norm": 2.0700268745422363, "learning_rate": 2.16601671721623e-06, "loss": 0.8108, "step": 19848 }, { "epoch": 0.701102481798233, "grad_norm": 1.5164644718170166, "learning_rate": 2.165545481005887e-06, "loss": 0.7697, "step": 19849 }, { "epoch": 0.7011378036019409, "grad_norm": 1.6063194274902344, "learning_rate": 2.1650742818923513e-06, "loss": 0.7468, "step": 19850 }, { "epoch": 0.7011731254056488, "grad_norm": 1.8091614246368408, "learning_rate": 2.164603119881792e-06, "loss": 0.8009, "step": 19851 }, { "epoch": 0.7012084472093567, "grad_norm": 1.6531829833984375, "learning_rate": 2.1641319949803737e-06, "loss": 0.7464, "step": 19852 }, { "epoch": 0.7012437690130646, "grad_norm": 1.6018023490905762, "learning_rate": 2.1636609071942614e-06, "loss": 0.785, "step": 19853 }, { "epoch": 0.7012790908167725, "grad_norm": 0.9487394094467163, "learning_rate": 2.1631898565296242e-06, "loss": 0.5851, "step": 19854 }, { "epoch": 0.7013144126204804, "grad_norm": 1.8881281614303589, "learning_rate": 2.162718842992624e-06, "loss": 0.8102, "step": 19855 }, { "epoch": 0.7013497344241884, "grad_norm": 1.8575495481491089, "learning_rate": 2.1622478665894253e-06, "loss": 0.7535, "step": 19856 }, { "epoch": 0.7013850562278963, "grad_norm": 1.7708306312561035, "learning_rate": 2.1617769273261946e-06, "loss": 0.7499, "step": 19857 }, { "epoch": 0.7014203780316042, "grad_norm": 1.7112513780593872, "learning_rate": 2.1613060252090927e-06, "loss": 0.8191, "step": 19858 }, { "epoch": 0.7014556998353121, "grad_norm": 1.6954418420791626, "learning_rate": 2.160835160244284e-06, "loss": 0.7649, "step": 19859 }, { "epoch": 0.70149102163902, "grad_norm": 1.6389011144638062, "learning_rate": 2.1603643324379307e-06, "loss": 0.7608, "step": 19860 }, { "epoch": 0.7015263434427279, "grad_norm": 0.8973472118377686, "learning_rate": 2.159893541796197e-06, "loss": 0.5878, "step": 19861 }, { "epoch": 0.7015616652464358, "grad_norm": 1.8494776487350464, "learning_rate": 2.1594227883252413e-06, "loss": 0.7858, "step": 19862 }, { "epoch": 0.7015969870501437, "grad_norm": 1.5843346118927002, "learning_rate": 2.1589520720312273e-06, "loss": 0.7783, "step": 19863 }, { "epoch": 0.7016323088538516, "grad_norm": 1.631974697113037, "learning_rate": 2.1584813929203153e-06, "loss": 0.7642, "step": 19864 }, { "epoch": 0.7016676306575595, "grad_norm": 1.7227977514266968, "learning_rate": 2.1580107509986626e-06, "loss": 0.7695, "step": 19865 }, { "epoch": 0.7017029524612675, "grad_norm": 2.023893356323242, "learning_rate": 2.157540146272431e-06, "loss": 0.7997, "step": 19866 }, { "epoch": 0.7017382742649754, "grad_norm": 1.6435786485671997, "learning_rate": 2.1570695787477815e-06, "loss": 0.787, "step": 19867 }, { "epoch": 0.7017735960686833, "grad_norm": 1.6114838123321533, "learning_rate": 2.1565990484308685e-06, "loss": 0.75, "step": 19868 }, { "epoch": 0.7018089178723912, "grad_norm": 1.584914207458496, "learning_rate": 2.1561285553278533e-06, "loss": 0.7593, "step": 19869 }, { "epoch": 0.7018442396760991, "grad_norm": 1.982027292251587, "learning_rate": 2.1556580994448927e-06, "loss": 0.7368, "step": 19870 }, { "epoch": 0.701879561479807, "grad_norm": 1.5406816005706787, "learning_rate": 2.1551876807881463e-06, "loss": 0.7627, "step": 19871 }, { "epoch": 0.7019148832835149, "grad_norm": 1.770136833190918, "learning_rate": 2.1547172993637664e-06, "loss": 0.79, "step": 19872 }, { "epoch": 0.7019502050872228, "grad_norm": 1.9028722047805786, "learning_rate": 2.1542469551779127e-06, "loss": 0.77, "step": 19873 }, { "epoch": 0.7019855268909306, "grad_norm": 1.6266214847564697, "learning_rate": 2.1537766482367407e-06, "loss": 0.7816, "step": 19874 }, { "epoch": 0.7020208486946385, "grad_norm": 0.9493948817253113, "learning_rate": 2.153306378546404e-06, "loss": 0.5555, "step": 19875 }, { "epoch": 0.7020561704983465, "grad_norm": 1.8717490434646606, "learning_rate": 2.1528361461130577e-06, "loss": 0.7505, "step": 19876 }, { "epoch": 0.7020914923020544, "grad_norm": 1.9167028665542603, "learning_rate": 2.152365950942858e-06, "loss": 0.7749, "step": 19877 }, { "epoch": 0.7021268141057623, "grad_norm": 1.8258758783340454, "learning_rate": 2.1518957930419565e-06, "loss": 0.7875, "step": 19878 }, { "epoch": 0.7021621359094702, "grad_norm": 2.1720354557037354, "learning_rate": 2.151425672416507e-06, "loss": 0.7726, "step": 19879 }, { "epoch": 0.7021974577131781, "grad_norm": 1.6253511905670166, "learning_rate": 2.150955589072665e-06, "loss": 0.7511, "step": 19880 }, { "epoch": 0.702232779516886, "grad_norm": 1.7918193340301514, "learning_rate": 2.1504855430165804e-06, "loss": 0.7517, "step": 19881 }, { "epoch": 0.7022681013205939, "grad_norm": 1.5722545385360718, "learning_rate": 2.1500155342544038e-06, "loss": 0.7613, "step": 19882 }, { "epoch": 0.7023034231243018, "grad_norm": 1.7282946109771729, "learning_rate": 2.1495455627922877e-06, "loss": 0.7335, "step": 19883 }, { "epoch": 0.7023387449280097, "grad_norm": 1.8866852521896362, "learning_rate": 2.1490756286363856e-06, "loss": 0.7903, "step": 19884 }, { "epoch": 0.7023740667317177, "grad_norm": 1.7576515674591064, "learning_rate": 2.148605731792844e-06, "loss": 0.7809, "step": 19885 }, { "epoch": 0.7024093885354256, "grad_norm": 1.7421125173568726, "learning_rate": 2.1481358722678144e-06, "loss": 0.7848, "step": 19886 }, { "epoch": 0.7024447103391335, "grad_norm": 1.6468279361724854, "learning_rate": 2.1476660500674485e-06, "loss": 0.7888, "step": 19887 }, { "epoch": 0.7024800321428414, "grad_norm": 2.0823614597320557, "learning_rate": 2.1471962651978913e-06, "loss": 0.7553, "step": 19888 }, { "epoch": 0.7025153539465493, "grad_norm": 0.9844979643821716, "learning_rate": 2.1467265176652924e-06, "loss": 0.6041, "step": 19889 }, { "epoch": 0.7025506757502572, "grad_norm": 1.730713129043579, "learning_rate": 2.1462568074758028e-06, "loss": 0.7711, "step": 19890 }, { "epoch": 0.7025859975539651, "grad_norm": 0.937706470489502, "learning_rate": 2.1457871346355665e-06, "loss": 0.5854, "step": 19891 }, { "epoch": 0.702621319357673, "grad_norm": 1.571552038192749, "learning_rate": 2.145317499150731e-06, "loss": 0.7601, "step": 19892 }, { "epoch": 0.7026566411613809, "grad_norm": 3.124983787536621, "learning_rate": 2.144847901027445e-06, "loss": 0.8032, "step": 19893 }, { "epoch": 0.7026919629650888, "grad_norm": 1.7024379968643188, "learning_rate": 2.1443783402718514e-06, "loss": 0.7865, "step": 19894 }, { "epoch": 0.7027272847687968, "grad_norm": 1.9357925653457642, "learning_rate": 2.1439088168900975e-06, "loss": 0.7409, "step": 19895 }, { "epoch": 0.7027626065725047, "grad_norm": 1.7545466423034668, "learning_rate": 2.1434393308883294e-06, "loss": 0.7685, "step": 19896 }, { "epoch": 0.7027979283762126, "grad_norm": 1.7258861064910889, "learning_rate": 2.142969882272689e-06, "loss": 0.7987, "step": 19897 }, { "epoch": 0.7028332501799205, "grad_norm": 1.5390535593032837, "learning_rate": 2.142500471049322e-06, "loss": 0.7643, "step": 19898 }, { "epoch": 0.7028685719836284, "grad_norm": 1.9814423322677612, "learning_rate": 2.142031097224373e-06, "loss": 0.7509, "step": 19899 }, { "epoch": 0.7029038937873362, "grad_norm": 1.7001125812530518, "learning_rate": 2.141561760803984e-06, "loss": 0.7659, "step": 19900 }, { "epoch": 0.7029392155910441, "grad_norm": 1.7208530902862549, "learning_rate": 2.141092461794295e-06, "loss": 0.7819, "step": 19901 }, { "epoch": 0.702974537394752, "grad_norm": 1.6849638223648071, "learning_rate": 2.140623200201451e-06, "loss": 0.7332, "step": 19902 }, { "epoch": 0.7030098591984599, "grad_norm": 1.8295223712921143, "learning_rate": 2.140153976031595e-06, "loss": 0.7848, "step": 19903 }, { "epoch": 0.7030451810021678, "grad_norm": 1.7822693586349487, "learning_rate": 2.139684789290864e-06, "loss": 0.7605, "step": 19904 }, { "epoch": 0.7030805028058758, "grad_norm": 1.6016614437103271, "learning_rate": 2.1392156399854007e-06, "loss": 0.7552, "step": 19905 }, { "epoch": 0.7031158246095837, "grad_norm": 1.7503539323806763, "learning_rate": 2.138746528121348e-06, "loss": 0.775, "step": 19906 }, { "epoch": 0.7031511464132916, "grad_norm": 1.6662179231643677, "learning_rate": 2.13827745370484e-06, "loss": 0.8131, "step": 19907 }, { "epoch": 0.7031864682169995, "grad_norm": 1.763147234916687, "learning_rate": 2.1378084167420188e-06, "loss": 0.8038, "step": 19908 }, { "epoch": 0.7032217900207074, "grad_norm": 1.6608824729919434, "learning_rate": 2.1373394172390255e-06, "loss": 0.767, "step": 19909 }, { "epoch": 0.7032571118244153, "grad_norm": 1.7075022459030151, "learning_rate": 2.1368704552019935e-06, "loss": 0.8211, "step": 19910 }, { "epoch": 0.7032924336281232, "grad_norm": 1.7926769256591797, "learning_rate": 2.1364015306370626e-06, "loss": 0.7852, "step": 19911 }, { "epoch": 0.7033277554318311, "grad_norm": 1.9420396089553833, "learning_rate": 2.1359326435503725e-06, "loss": 0.8052, "step": 19912 }, { "epoch": 0.703363077235539, "grad_norm": 1.8952420949935913, "learning_rate": 2.1354637939480556e-06, "loss": 0.7859, "step": 19913 }, { "epoch": 0.703398399039247, "grad_norm": 1.7632924318313599, "learning_rate": 2.1349949818362504e-06, "loss": 0.7702, "step": 19914 }, { "epoch": 0.7034337208429549, "grad_norm": 1.7017004489898682, "learning_rate": 2.1345262072210938e-06, "loss": 0.7676, "step": 19915 }, { "epoch": 0.7034690426466628, "grad_norm": 1.5445969104766846, "learning_rate": 2.1340574701087177e-06, "loss": 0.7484, "step": 19916 }, { "epoch": 0.7035043644503707, "grad_norm": 2.1410579681396484, "learning_rate": 2.1335887705052585e-06, "loss": 0.8072, "step": 19917 }, { "epoch": 0.7035396862540786, "grad_norm": 1.6848012208938599, "learning_rate": 2.133120108416853e-06, "loss": 0.7824, "step": 19918 }, { "epoch": 0.7035750080577865, "grad_norm": 1.7058751583099365, "learning_rate": 2.132651483849632e-06, "loss": 0.7857, "step": 19919 }, { "epoch": 0.7036103298614944, "grad_norm": 2.063122034072876, "learning_rate": 2.132182896809728e-06, "loss": 0.7737, "step": 19920 }, { "epoch": 0.7036456516652023, "grad_norm": 1.6215087175369263, "learning_rate": 2.1317143473032745e-06, "loss": 0.757, "step": 19921 }, { "epoch": 0.7036809734689102, "grad_norm": 1.7242584228515625, "learning_rate": 2.1312458353364067e-06, "loss": 0.7942, "step": 19922 }, { "epoch": 0.7037162952726181, "grad_norm": 1.670233130455017, "learning_rate": 2.1307773609152526e-06, "loss": 0.7801, "step": 19923 }, { "epoch": 0.703751617076326, "grad_norm": 3.391514778137207, "learning_rate": 2.130308924045945e-06, "loss": 0.7862, "step": 19924 }, { "epoch": 0.703786938880034, "grad_norm": 1.6742836236953735, "learning_rate": 2.1298405247346164e-06, "loss": 0.7768, "step": 19925 }, { "epoch": 0.7038222606837418, "grad_norm": 1.6869772672653198, "learning_rate": 2.129372162987394e-06, "loss": 0.7481, "step": 19926 }, { "epoch": 0.7038575824874497, "grad_norm": 0.931628406047821, "learning_rate": 2.1289038388104083e-06, "loss": 0.5921, "step": 19927 }, { "epoch": 0.7038929042911576, "grad_norm": 1.7328282594680786, "learning_rate": 2.1284355522097898e-06, "loss": 0.7642, "step": 19928 }, { "epoch": 0.7039282260948655, "grad_norm": 2.087254285812378, "learning_rate": 2.127967303191669e-06, "loss": 0.7561, "step": 19929 }, { "epoch": 0.7039635478985734, "grad_norm": 1.8577719926834106, "learning_rate": 2.1274990917621706e-06, "loss": 0.7712, "step": 19930 }, { "epoch": 0.7039988697022813, "grad_norm": 1.8720958232879639, "learning_rate": 2.127030917927424e-06, "loss": 0.8138, "step": 19931 }, { "epoch": 0.7040341915059892, "grad_norm": 1.699045181274414, "learning_rate": 2.126562781693558e-06, "loss": 0.7658, "step": 19932 }, { "epoch": 0.7040695133096971, "grad_norm": 1.7536721229553223, "learning_rate": 2.126094683066697e-06, "loss": 0.7594, "step": 19933 }, { "epoch": 0.704104835113405, "grad_norm": 1.8104093074798584, "learning_rate": 2.125626622052968e-06, "loss": 0.7586, "step": 19934 }, { "epoch": 0.704140156917113, "grad_norm": 1.621056318283081, "learning_rate": 2.1251585986585e-06, "loss": 0.7867, "step": 19935 }, { "epoch": 0.7041754787208209, "grad_norm": 1.6482434272766113, "learning_rate": 2.1246906128894134e-06, "loss": 0.7671, "step": 19936 }, { "epoch": 0.7042108005245288, "grad_norm": 1.618495225906372, "learning_rate": 2.124222664751838e-06, "loss": 0.7834, "step": 19937 }, { "epoch": 0.7042461223282367, "grad_norm": 1.7541593313217163, "learning_rate": 2.123754754251893e-06, "loss": 0.8025, "step": 19938 }, { "epoch": 0.7042814441319446, "grad_norm": 1.9388309717178345, "learning_rate": 2.1232868813957073e-06, "loss": 0.7803, "step": 19939 }, { "epoch": 0.7043167659356525, "grad_norm": 1.5992361307144165, "learning_rate": 2.1228190461894005e-06, "loss": 0.739, "step": 19940 }, { "epoch": 0.7043520877393604, "grad_norm": 1.6247175931930542, "learning_rate": 2.1223512486390973e-06, "loss": 0.7937, "step": 19941 }, { "epoch": 0.7043874095430683, "grad_norm": 1.5487598180770874, "learning_rate": 2.1218834887509216e-06, "loss": 0.7837, "step": 19942 }, { "epoch": 0.7044227313467762, "grad_norm": 1.7768502235412598, "learning_rate": 2.121415766530992e-06, "loss": 0.7773, "step": 19943 }, { "epoch": 0.7044580531504842, "grad_norm": 1.7031099796295166, "learning_rate": 2.1209480819854315e-06, "loss": 0.7434, "step": 19944 }, { "epoch": 0.7044933749541921, "grad_norm": 1.8780982494354248, "learning_rate": 2.120480435120364e-06, "loss": 0.7359, "step": 19945 }, { "epoch": 0.7045286967579, "grad_norm": 3.2313449382781982, "learning_rate": 2.1200128259419043e-06, "loss": 0.7649, "step": 19946 }, { "epoch": 0.7045640185616079, "grad_norm": 1.6621077060699463, "learning_rate": 2.119545254456176e-06, "loss": 0.7419, "step": 19947 }, { "epoch": 0.7045993403653158, "grad_norm": 2.120511770248413, "learning_rate": 2.1190777206693e-06, "loss": 0.777, "step": 19948 }, { "epoch": 0.7046346621690237, "grad_norm": 1.7848916053771973, "learning_rate": 2.118610224587391e-06, "loss": 0.7873, "step": 19949 }, { "epoch": 0.7046699839727316, "grad_norm": 2.0765984058380127, "learning_rate": 2.11814276621657e-06, "loss": 0.7769, "step": 19950 }, { "epoch": 0.7047053057764395, "grad_norm": 1.7296024560928345, "learning_rate": 2.117675345562956e-06, "loss": 0.8124, "step": 19951 }, { "epoch": 0.7047406275801473, "grad_norm": 1.6435502767562866, "learning_rate": 2.1172079626326643e-06, "loss": 0.7723, "step": 19952 }, { "epoch": 0.7047759493838552, "grad_norm": 1.6074435710906982, "learning_rate": 2.116740617431812e-06, "loss": 0.7865, "step": 19953 }, { "epoch": 0.7048112711875631, "grad_norm": 1.9615442752838135, "learning_rate": 2.116273309966519e-06, "loss": 0.7709, "step": 19954 }, { "epoch": 0.7048465929912711, "grad_norm": 1.6900146007537842, "learning_rate": 2.115806040242898e-06, "loss": 0.7612, "step": 19955 }, { "epoch": 0.704881914794979, "grad_norm": 1.6715939044952393, "learning_rate": 2.1153388082670645e-06, "loss": 0.7851, "step": 19956 }, { "epoch": 0.7049172365986869, "grad_norm": 1.6008661985397339, "learning_rate": 2.114871614045134e-06, "loss": 0.7562, "step": 19957 }, { "epoch": 0.7049525584023948, "grad_norm": 1.654877781867981, "learning_rate": 2.1144044575832233e-06, "loss": 0.8102, "step": 19958 }, { "epoch": 0.7049878802061027, "grad_norm": 1.622884750366211, "learning_rate": 2.113937338887443e-06, "loss": 0.7795, "step": 19959 }, { "epoch": 0.7050232020098106, "grad_norm": 1.7084341049194336, "learning_rate": 2.1134702579639084e-06, "loss": 0.7701, "step": 19960 }, { "epoch": 0.7050585238135185, "grad_norm": 1.9552372694015503, "learning_rate": 2.1130032148187335e-06, "loss": 0.7415, "step": 19961 }, { "epoch": 0.7050938456172264, "grad_norm": 1.5403834581375122, "learning_rate": 2.112536209458029e-06, "loss": 0.7692, "step": 19962 }, { "epoch": 0.7051291674209343, "grad_norm": 1.7437288761138916, "learning_rate": 2.1120692418879067e-06, "loss": 0.7486, "step": 19963 }, { "epoch": 0.7051644892246423, "grad_norm": 1.74844491481781, "learning_rate": 2.1116023121144817e-06, "loss": 0.73, "step": 19964 }, { "epoch": 0.7051998110283502, "grad_norm": 1.612088680267334, "learning_rate": 2.111135420143861e-06, "loss": 0.7375, "step": 19965 }, { "epoch": 0.7052351328320581, "grad_norm": 1.9011564254760742, "learning_rate": 2.110668565982157e-06, "loss": 0.8074, "step": 19966 }, { "epoch": 0.705270454635766, "grad_norm": 1.8237061500549316, "learning_rate": 2.110201749635481e-06, "loss": 0.7912, "step": 19967 }, { "epoch": 0.7053057764394739, "grad_norm": 1.8600398302078247, "learning_rate": 2.1097349711099396e-06, "loss": 0.8028, "step": 19968 }, { "epoch": 0.7053410982431818, "grad_norm": 1.660291314125061, "learning_rate": 2.109268230411644e-06, "loss": 0.7462, "step": 19969 }, { "epoch": 0.7053764200468897, "grad_norm": 1.8444206714630127, "learning_rate": 2.108801527546704e-06, "loss": 0.768, "step": 19970 }, { "epoch": 0.7054117418505976, "grad_norm": 1.6064237356185913, "learning_rate": 2.108334862521224e-06, "loss": 0.7866, "step": 19971 }, { "epoch": 0.7054470636543055, "grad_norm": 1.749489426612854, "learning_rate": 2.107868235341314e-06, "loss": 0.765, "step": 19972 }, { "epoch": 0.7054823854580135, "grad_norm": 1.6141330003738403, "learning_rate": 2.1074016460130826e-06, "loss": 0.7647, "step": 19973 }, { "epoch": 0.7055177072617214, "grad_norm": 1.678751826286316, "learning_rate": 2.106935094542635e-06, "loss": 0.7618, "step": 19974 }, { "epoch": 0.7055530290654293, "grad_norm": 1.6619224548339844, "learning_rate": 2.1064685809360748e-06, "loss": 0.731, "step": 19975 }, { "epoch": 0.7055883508691372, "grad_norm": 1.8848658800125122, "learning_rate": 2.10600210519951e-06, "loss": 0.7725, "step": 19976 }, { "epoch": 0.7056236726728451, "grad_norm": 0.8629989624023438, "learning_rate": 2.1055356673390477e-06, "loss": 0.5661, "step": 19977 }, { "epoch": 0.7056589944765529, "grad_norm": 1.7485406398773193, "learning_rate": 2.1050692673607886e-06, "loss": 0.7837, "step": 19978 }, { "epoch": 0.7056943162802608, "grad_norm": 1.6781917810440063, "learning_rate": 2.1046029052708384e-06, "loss": 0.8261, "step": 19979 }, { "epoch": 0.7057296380839687, "grad_norm": 1.678458333015442, "learning_rate": 2.1041365810753033e-06, "loss": 0.7063, "step": 19980 }, { "epoch": 0.7057649598876766, "grad_norm": 1.8330754041671753, "learning_rate": 2.1036702947802824e-06, "loss": 0.77, "step": 19981 }, { "epoch": 0.7058002816913845, "grad_norm": 1.7598752975463867, "learning_rate": 2.1032040463918803e-06, "loss": 0.8059, "step": 19982 }, { "epoch": 0.7058356034950924, "grad_norm": 1.9467321634292603, "learning_rate": 2.1027378359162003e-06, "loss": 0.7813, "step": 19983 }, { "epoch": 0.7058709252988004, "grad_norm": 1.9945790767669678, "learning_rate": 2.1022716633593414e-06, "loss": 0.7563, "step": 19984 }, { "epoch": 0.7059062471025083, "grad_norm": 1.9255797863006592, "learning_rate": 2.101805528727407e-06, "loss": 0.7596, "step": 19985 }, { "epoch": 0.7059415689062162, "grad_norm": 1.7975523471832275, "learning_rate": 2.1013394320264965e-06, "loss": 0.7565, "step": 19986 }, { "epoch": 0.7059768907099241, "grad_norm": 1.022788643836975, "learning_rate": 2.100873373262712e-06, "loss": 0.5641, "step": 19987 }, { "epoch": 0.706012212513632, "grad_norm": 1.6436312198638916, "learning_rate": 2.10040735244215e-06, "loss": 0.7708, "step": 19988 }, { "epoch": 0.7060475343173399, "grad_norm": 1.7076706886291504, "learning_rate": 2.099941369570912e-06, "loss": 0.7617, "step": 19989 }, { "epoch": 0.7060828561210478, "grad_norm": 1.6943867206573486, "learning_rate": 2.099475424655098e-06, "loss": 0.7323, "step": 19990 }, { "epoch": 0.7061181779247557, "grad_norm": 2.019129991531372, "learning_rate": 2.0990095177008023e-06, "loss": 0.8102, "step": 19991 }, { "epoch": 0.7061534997284636, "grad_norm": 4.385367393493652, "learning_rate": 2.098543648714127e-06, "loss": 0.766, "step": 19992 }, { "epoch": 0.7061888215321716, "grad_norm": 1.9001189470291138, "learning_rate": 2.098077817701167e-06, "loss": 0.7951, "step": 19993 }, { "epoch": 0.7062241433358795, "grad_norm": 1.67922842502594, "learning_rate": 2.0976120246680164e-06, "loss": 0.7605, "step": 19994 }, { "epoch": 0.7062594651395874, "grad_norm": 1.7501317262649536, "learning_rate": 2.0971462696207746e-06, "loss": 0.7585, "step": 19995 }, { "epoch": 0.7062947869432953, "grad_norm": 1.9007974863052368, "learning_rate": 2.096680552565537e-06, "loss": 0.7565, "step": 19996 }, { "epoch": 0.7063301087470032, "grad_norm": 1.640034556388855, "learning_rate": 2.0962148735083998e-06, "loss": 0.7587, "step": 19997 }, { "epoch": 0.7063654305507111, "grad_norm": 1.6319048404693604, "learning_rate": 2.095749232455455e-06, "loss": 0.7635, "step": 19998 }, { "epoch": 0.706400752354419, "grad_norm": 1.7259308099746704, "learning_rate": 2.095283629412798e-06, "loss": 0.766, "step": 19999 }, { "epoch": 0.7064360741581269, "grad_norm": 1.8573143482208252, "learning_rate": 2.094818064386525e-06, "loss": 0.7478, "step": 20000 }, { "epoch": 0.7064713959618348, "grad_norm": 1.826478123664856, "learning_rate": 2.094352537382725e-06, "loss": 0.7736, "step": 20001 }, { "epoch": 0.7065067177655427, "grad_norm": 1.926072597503662, "learning_rate": 2.093887048407493e-06, "loss": 0.7813, "step": 20002 }, { "epoch": 0.7065420395692507, "grad_norm": 1.6875284910202026, "learning_rate": 2.093421597466923e-06, "loss": 0.7928, "step": 20003 }, { "epoch": 0.7065773613729585, "grad_norm": 1.781915545463562, "learning_rate": 2.0929561845671025e-06, "loss": 0.7619, "step": 20004 }, { "epoch": 0.7066126831766664, "grad_norm": 1.7919385433197021, "learning_rate": 2.0924908097141257e-06, "loss": 0.7966, "step": 20005 }, { "epoch": 0.7066480049803743, "grad_norm": 1.7371306419372559, "learning_rate": 2.0920254729140838e-06, "loss": 0.769, "step": 20006 }, { "epoch": 0.7066833267840822, "grad_norm": 1.6059937477111816, "learning_rate": 2.091560174173064e-06, "loss": 0.7718, "step": 20007 }, { "epoch": 0.7067186485877901, "grad_norm": 1.6758317947387695, "learning_rate": 2.091094913497158e-06, "loss": 0.7512, "step": 20008 }, { "epoch": 0.706753970391498, "grad_norm": 2.2983553409576416, "learning_rate": 2.0906296908924566e-06, "loss": 0.7496, "step": 20009 }, { "epoch": 0.7067892921952059, "grad_norm": 1.7068283557891846, "learning_rate": 2.0901645063650476e-06, "loss": 0.7704, "step": 20010 }, { "epoch": 0.7068246139989138, "grad_norm": 1.5720884799957275, "learning_rate": 2.089699359921016e-06, "loss": 0.7488, "step": 20011 }, { "epoch": 0.7068599358026217, "grad_norm": 1.6333459615707397, "learning_rate": 2.0892342515664525e-06, "loss": 0.7877, "step": 20012 }, { "epoch": 0.7068952576063297, "grad_norm": 1.7512454986572266, "learning_rate": 2.0887691813074456e-06, "loss": 0.7469, "step": 20013 }, { "epoch": 0.7069305794100376, "grad_norm": 1.6840450763702393, "learning_rate": 2.0883041491500787e-06, "loss": 0.8072, "step": 20014 }, { "epoch": 0.7069659012137455, "grad_norm": 1.7511907815933228, "learning_rate": 2.0878391551004403e-06, "loss": 0.7856, "step": 20015 }, { "epoch": 0.7070012230174534, "grad_norm": 1.6022891998291016, "learning_rate": 2.087374199164617e-06, "loss": 0.7608, "step": 20016 }, { "epoch": 0.7070365448211613, "grad_norm": 1.8998212814331055, "learning_rate": 2.0869092813486904e-06, "loss": 0.8116, "step": 20017 }, { "epoch": 0.7070718666248692, "grad_norm": 1.7278556823730469, "learning_rate": 2.0864444016587478e-06, "loss": 0.7641, "step": 20018 }, { "epoch": 0.7071071884285771, "grad_norm": 1.696853518486023, "learning_rate": 2.085979560100875e-06, "loss": 0.7484, "step": 20019 }, { "epoch": 0.707142510232285, "grad_norm": 1.8142648935317993, "learning_rate": 2.0855147566811525e-06, "loss": 0.8202, "step": 20020 }, { "epoch": 0.7071778320359929, "grad_norm": 1.6845879554748535, "learning_rate": 2.0850499914056645e-06, "loss": 0.7548, "step": 20021 }, { "epoch": 0.7072131538397008, "grad_norm": 1.893673062324524, "learning_rate": 2.084585264280496e-06, "loss": 0.7864, "step": 20022 }, { "epoch": 0.7072484756434088, "grad_norm": 1.6653900146484375, "learning_rate": 2.084120575311726e-06, "loss": 0.7497, "step": 20023 }, { "epoch": 0.7072837974471167, "grad_norm": 2.049021005630493, "learning_rate": 2.083655924505438e-06, "loss": 0.79, "step": 20024 }, { "epoch": 0.7073191192508246, "grad_norm": 1.7321220636367798, "learning_rate": 2.0831913118677143e-06, "loss": 0.7703, "step": 20025 }, { "epoch": 0.7073544410545325, "grad_norm": 1.535502552986145, "learning_rate": 2.082726737404633e-06, "loss": 0.7537, "step": 20026 }, { "epoch": 0.7073897628582404, "grad_norm": 1.7827800512313843, "learning_rate": 2.0822622011222755e-06, "loss": 0.7802, "step": 20027 }, { "epoch": 0.7074250846619483, "grad_norm": 2.178736448287964, "learning_rate": 2.081797703026724e-06, "loss": 0.7904, "step": 20028 }, { "epoch": 0.7074604064656562, "grad_norm": 1.656659722328186, "learning_rate": 2.081333243124055e-06, "loss": 0.7488, "step": 20029 }, { "epoch": 0.707495728269364, "grad_norm": 1.6869906187057495, "learning_rate": 2.0808688214203466e-06, "loss": 0.7592, "step": 20030 }, { "epoch": 0.7075310500730719, "grad_norm": 1.7622569799423218, "learning_rate": 2.080404437921678e-06, "loss": 0.774, "step": 20031 }, { "epoch": 0.7075663718767798, "grad_norm": 1.8642969131469727, "learning_rate": 2.0799400926341295e-06, "loss": 0.7668, "step": 20032 }, { "epoch": 0.7076016936804878, "grad_norm": 1.6557676792144775, "learning_rate": 2.0794757855637744e-06, "loss": 0.7661, "step": 20033 }, { "epoch": 0.7076370154841957, "grad_norm": 1.6019871234893799, "learning_rate": 2.079011516716691e-06, "loss": 0.7723, "step": 20034 }, { "epoch": 0.7076723372879036, "grad_norm": 1.5622249841690063, "learning_rate": 2.0785472860989575e-06, "loss": 0.7447, "step": 20035 }, { "epoch": 0.7077076590916115, "grad_norm": 1.6536086797714233, "learning_rate": 2.078083093716646e-06, "loss": 0.7581, "step": 20036 }, { "epoch": 0.7077429808953194, "grad_norm": 1.670629858970642, "learning_rate": 2.0776189395758338e-06, "loss": 0.7522, "step": 20037 }, { "epoch": 0.7077783026990273, "grad_norm": 1.5537099838256836, "learning_rate": 2.077154823682598e-06, "loss": 0.7744, "step": 20038 }, { "epoch": 0.7078136245027352, "grad_norm": 1.7688568830490112, "learning_rate": 2.0766907460430073e-06, "loss": 0.8127, "step": 20039 }, { "epoch": 0.7078489463064431, "grad_norm": 1.7206315994262695, "learning_rate": 2.0762267066631398e-06, "loss": 0.8153, "step": 20040 }, { "epoch": 0.707884268110151, "grad_norm": 1.7051419019699097, "learning_rate": 2.0757627055490694e-06, "loss": 0.7822, "step": 20041 }, { "epoch": 0.707919589913859, "grad_norm": 0.8991140127182007, "learning_rate": 2.0752987427068644e-06, "loss": 0.5528, "step": 20042 }, { "epoch": 0.7079549117175669, "grad_norm": 1.6846704483032227, "learning_rate": 2.0748348181425996e-06, "loss": 0.7689, "step": 20043 }, { "epoch": 0.7079902335212748, "grad_norm": 1.720481514930725, "learning_rate": 2.074370931862348e-06, "loss": 0.7771, "step": 20044 }, { "epoch": 0.7080255553249827, "grad_norm": 1.777227759361267, "learning_rate": 2.0739070838721802e-06, "loss": 0.8156, "step": 20045 }, { "epoch": 0.7080608771286906, "grad_norm": 1.7096997499465942, "learning_rate": 2.0734432741781653e-06, "loss": 0.7149, "step": 20046 }, { "epoch": 0.7080961989323985, "grad_norm": 1.7080477476119995, "learning_rate": 2.0729795027863764e-06, "loss": 0.7692, "step": 20047 }, { "epoch": 0.7081315207361064, "grad_norm": 1.913365364074707, "learning_rate": 2.0725157697028815e-06, "loss": 0.7904, "step": 20048 }, { "epoch": 0.7081668425398143, "grad_norm": 1.6739990711212158, "learning_rate": 2.072052074933748e-06, "loss": 0.7983, "step": 20049 }, { "epoch": 0.7082021643435222, "grad_norm": 1.629396677017212, "learning_rate": 2.071588418485047e-06, "loss": 0.7409, "step": 20050 }, { "epoch": 0.7082374861472301, "grad_norm": 1.650530457496643, "learning_rate": 2.0711248003628474e-06, "loss": 0.7291, "step": 20051 }, { "epoch": 0.708272807950938, "grad_norm": 1.8952000141143799, "learning_rate": 2.0706612205732146e-06, "loss": 0.7764, "step": 20052 }, { "epoch": 0.708308129754646, "grad_norm": 1.7976374626159668, "learning_rate": 2.0701976791222174e-06, "loss": 0.7725, "step": 20053 }, { "epoch": 0.7083434515583539, "grad_norm": 2.2714688777923584, "learning_rate": 2.069734176015921e-06, "loss": 0.7506, "step": 20054 }, { "epoch": 0.7083787733620618, "grad_norm": 1.5972905158996582, "learning_rate": 2.069270711260396e-06, "loss": 0.753, "step": 20055 }, { "epoch": 0.7084140951657696, "grad_norm": 1.6675989627838135, "learning_rate": 2.0688072848617024e-06, "loss": 0.7845, "step": 20056 }, { "epoch": 0.7084494169694775, "grad_norm": 0.9649953842163086, "learning_rate": 2.0683438968259085e-06, "loss": 0.5926, "step": 20057 }, { "epoch": 0.7084847387731854, "grad_norm": 1.7214856147766113, "learning_rate": 2.06788054715908e-06, "loss": 0.7489, "step": 20058 }, { "epoch": 0.7085200605768933, "grad_norm": 2.082864999771118, "learning_rate": 2.0674172358672786e-06, "loss": 0.7675, "step": 20059 }, { "epoch": 0.7085553823806012, "grad_norm": 1.7544128894805908, "learning_rate": 2.0669539629565682e-06, "loss": 0.7756, "step": 20060 }, { "epoch": 0.7085907041843091, "grad_norm": 1.5779719352722168, "learning_rate": 2.0664907284330154e-06, "loss": 0.7618, "step": 20061 }, { "epoch": 0.708626025988017, "grad_norm": 1.6813244819641113, "learning_rate": 2.0660275323026785e-06, "loss": 0.7669, "step": 20062 }, { "epoch": 0.708661347791725, "grad_norm": 1.5806589126586914, "learning_rate": 2.0655643745716218e-06, "loss": 0.7627, "step": 20063 }, { "epoch": 0.7086966695954329, "grad_norm": 1.6706525087356567, "learning_rate": 2.065101255245909e-06, "loss": 0.8254, "step": 20064 }, { "epoch": 0.7087319913991408, "grad_norm": 2.886977195739746, "learning_rate": 2.0646381743315968e-06, "loss": 0.7694, "step": 20065 }, { "epoch": 0.7087673132028487, "grad_norm": 2.1622326374053955, "learning_rate": 2.0641751318347507e-06, "loss": 0.7825, "step": 20066 }, { "epoch": 0.7088026350065566, "grad_norm": 1.7862352132797241, "learning_rate": 2.063712127761426e-06, "loss": 0.7936, "step": 20067 }, { "epoch": 0.7088379568102645, "grad_norm": 0.944640576839447, "learning_rate": 2.0632491621176883e-06, "loss": 0.5823, "step": 20068 }, { "epoch": 0.7088732786139724, "grad_norm": 1.7884269952774048, "learning_rate": 2.0627862349095908e-06, "loss": 0.7626, "step": 20069 }, { "epoch": 0.7089086004176803, "grad_norm": 1.6319348812103271, "learning_rate": 2.062323346143195e-06, "loss": 0.7907, "step": 20070 }, { "epoch": 0.7089439222213882, "grad_norm": 1.8314157724380493, "learning_rate": 2.061860495824561e-06, "loss": 0.7494, "step": 20071 }, { "epoch": 0.7089792440250962, "grad_norm": 1.4934849739074707, "learning_rate": 2.061397683959742e-06, "loss": 0.7678, "step": 20072 }, { "epoch": 0.7090145658288041, "grad_norm": 1.7064697742462158, "learning_rate": 2.0609349105547988e-06, "loss": 0.7978, "step": 20073 }, { "epoch": 0.709049887632512, "grad_norm": 1.7503348588943481, "learning_rate": 2.060472175615788e-06, "loss": 0.7616, "step": 20074 }, { "epoch": 0.7090852094362199, "grad_norm": 1.6125333309173584, "learning_rate": 2.0600094791487635e-06, "loss": 0.7234, "step": 20075 }, { "epoch": 0.7091205312399278, "grad_norm": 1.6243748664855957, "learning_rate": 2.059546821159782e-06, "loss": 0.7559, "step": 20076 }, { "epoch": 0.7091558530436357, "grad_norm": 1.7388135194778442, "learning_rate": 2.0590842016549005e-06, "loss": 0.7263, "step": 20077 }, { "epoch": 0.7091911748473436, "grad_norm": 1.7164831161499023, "learning_rate": 2.058621620640171e-06, "loss": 0.7716, "step": 20078 }, { "epoch": 0.7092264966510515, "grad_norm": 1.91934335231781, "learning_rate": 2.058159078121649e-06, "loss": 0.7689, "step": 20079 }, { "epoch": 0.7092618184547594, "grad_norm": 1.8599565029144287, "learning_rate": 2.057696574105389e-06, "loss": 0.8019, "step": 20080 }, { "epoch": 0.7092971402584674, "grad_norm": 1.6357629299163818, "learning_rate": 2.057234108597441e-06, "loss": 0.7815, "step": 20081 }, { "epoch": 0.7093324620621752, "grad_norm": 2.822460174560547, "learning_rate": 2.056771681603861e-06, "loss": 0.8324, "step": 20082 }, { "epoch": 0.7093677838658831, "grad_norm": 1.6153643131256104, "learning_rate": 2.056309293130701e-06, "loss": 0.752, "step": 20083 }, { "epoch": 0.709403105669591, "grad_norm": 2.092059850692749, "learning_rate": 2.0558469431840116e-06, "loss": 0.7647, "step": 20084 }, { "epoch": 0.7094384274732989, "grad_norm": 1.5908201932907104, "learning_rate": 2.055384631769842e-06, "loss": 0.7833, "step": 20085 }, { "epoch": 0.7094737492770068, "grad_norm": 1.9290858507156372, "learning_rate": 2.0549223588942457e-06, "loss": 0.7584, "step": 20086 }, { "epoch": 0.7095090710807147, "grad_norm": 1.7905070781707764, "learning_rate": 2.0544601245632733e-06, "loss": 0.7984, "step": 20087 }, { "epoch": 0.7095443928844226, "grad_norm": 1.804880142211914, "learning_rate": 2.053997928782971e-06, "loss": 0.7685, "step": 20088 }, { "epoch": 0.7095797146881305, "grad_norm": 1.788068175315857, "learning_rate": 2.05353577155939e-06, "loss": 0.7731, "step": 20089 }, { "epoch": 0.7096150364918384, "grad_norm": 1.7704075574874878, "learning_rate": 2.0530736528985813e-06, "loss": 0.7678, "step": 20090 }, { "epoch": 0.7096503582955463, "grad_norm": 2.1113643646240234, "learning_rate": 2.052611572806589e-06, "loss": 0.7788, "step": 20091 }, { "epoch": 0.7096856800992543, "grad_norm": 1.9400206804275513, "learning_rate": 2.0521495312894612e-06, "loss": 0.7799, "step": 20092 }, { "epoch": 0.7097210019029622, "grad_norm": 1.623116135597229, "learning_rate": 2.051687528353249e-06, "loss": 0.7608, "step": 20093 }, { "epoch": 0.7097563237066701, "grad_norm": 1.647144079208374, "learning_rate": 2.051225564003994e-06, "loss": 0.7553, "step": 20094 }, { "epoch": 0.709791645510378, "grad_norm": 1.6576846837997437, "learning_rate": 2.050763638247745e-06, "loss": 0.7544, "step": 20095 }, { "epoch": 0.7098269673140859, "grad_norm": 1.638330340385437, "learning_rate": 2.050301751090548e-06, "loss": 0.795, "step": 20096 }, { "epoch": 0.7098622891177938, "grad_norm": 1.6107056140899658, "learning_rate": 2.0498399025384465e-06, "loss": 0.7934, "step": 20097 }, { "epoch": 0.7098976109215017, "grad_norm": 1.8512927293777466, "learning_rate": 2.0493780925974847e-06, "loss": 0.8069, "step": 20098 }, { "epoch": 0.7099329327252096, "grad_norm": 1.7246254682540894, "learning_rate": 2.0489163212737104e-06, "loss": 0.7941, "step": 20099 }, { "epoch": 0.7099682545289175, "grad_norm": 1.772995948791504, "learning_rate": 2.0484545885731626e-06, "loss": 0.7621, "step": 20100 }, { "epoch": 0.7100035763326255, "grad_norm": 1.8850117921829224, "learning_rate": 2.047992894501886e-06, "loss": 0.7866, "step": 20101 }, { "epoch": 0.7100388981363334, "grad_norm": 1.78982412815094, "learning_rate": 2.0475312390659235e-06, "loss": 0.7833, "step": 20102 }, { "epoch": 0.7100742199400413, "grad_norm": 1.7242757081985474, "learning_rate": 2.047069622271321e-06, "loss": 0.7884, "step": 20103 }, { "epoch": 0.7101095417437492, "grad_norm": 1.6435937881469727, "learning_rate": 2.0466080441241128e-06, "loss": 0.7613, "step": 20104 }, { "epoch": 0.7101448635474571, "grad_norm": 1.73508620262146, "learning_rate": 2.046146504630343e-06, "loss": 0.7567, "step": 20105 }, { "epoch": 0.710180185351165, "grad_norm": 1.5757805109024048, "learning_rate": 2.045685003796054e-06, "loss": 0.7437, "step": 20106 }, { "epoch": 0.7102155071548729, "grad_norm": 1.562982439994812, "learning_rate": 2.045223541627283e-06, "loss": 0.7638, "step": 20107 }, { "epoch": 0.7102508289585807, "grad_norm": 1.7424205541610718, "learning_rate": 2.0447621181300707e-06, "loss": 0.7704, "step": 20108 }, { "epoch": 0.7102861507622886, "grad_norm": 1.6630090475082397, "learning_rate": 2.044300733310458e-06, "loss": 0.7624, "step": 20109 }, { "epoch": 0.7103214725659965, "grad_norm": 1.7077640295028687, "learning_rate": 2.04383938717448e-06, "loss": 0.7986, "step": 20110 }, { "epoch": 0.7103567943697044, "grad_norm": 1.7773414850234985, "learning_rate": 2.0433780797281764e-06, "loss": 0.7988, "step": 20111 }, { "epoch": 0.7103921161734124, "grad_norm": 1.7049243450164795, "learning_rate": 2.042916810977585e-06, "loss": 0.7787, "step": 20112 }, { "epoch": 0.7104274379771203, "grad_norm": 1.680288553237915, "learning_rate": 2.0424555809287434e-06, "loss": 0.7807, "step": 20113 }, { "epoch": 0.7104627597808282, "grad_norm": 1.6407054662704468, "learning_rate": 2.041994389587686e-06, "loss": 0.7753, "step": 20114 }, { "epoch": 0.7104980815845361, "grad_norm": 1.7260702848434448, "learning_rate": 2.04153323696045e-06, "loss": 0.7689, "step": 20115 }, { "epoch": 0.710533403388244, "grad_norm": 1.7673895359039307, "learning_rate": 2.041072123053073e-06, "loss": 0.7494, "step": 20116 }, { "epoch": 0.7105687251919519, "grad_norm": 1.9765069484710693, "learning_rate": 2.040611047871585e-06, "loss": 0.7912, "step": 20117 }, { "epoch": 0.7106040469956598, "grad_norm": 1.674041748046875, "learning_rate": 2.040150011422024e-06, "loss": 0.7361, "step": 20118 }, { "epoch": 0.7106393687993677, "grad_norm": 1.5140081644058228, "learning_rate": 2.039689013710425e-06, "loss": 0.741, "step": 20119 }, { "epoch": 0.7106746906030756, "grad_norm": 1.64520263671875, "learning_rate": 2.0392280547428185e-06, "loss": 0.772, "step": 20120 }, { "epoch": 0.7107100124067836, "grad_norm": 1.7091608047485352, "learning_rate": 2.0387671345252395e-06, "loss": 0.7405, "step": 20121 }, { "epoch": 0.7107453342104915, "grad_norm": 1.5861715078353882, "learning_rate": 2.0383062530637182e-06, "loss": 0.7632, "step": 20122 }, { "epoch": 0.7107806560141994, "grad_norm": 1.6485501527786255, "learning_rate": 2.0378454103642898e-06, "loss": 0.7929, "step": 20123 }, { "epoch": 0.7108159778179073, "grad_norm": 1.7557799816131592, "learning_rate": 2.0373846064329817e-06, "loss": 0.7897, "step": 20124 }, { "epoch": 0.7108512996216152, "grad_norm": 1.8122011423110962, "learning_rate": 2.0369238412758273e-06, "loss": 0.7658, "step": 20125 }, { "epoch": 0.7108866214253231, "grad_norm": 1.7539838552474976, "learning_rate": 2.0364631148988584e-06, "loss": 0.7636, "step": 20126 }, { "epoch": 0.710921943229031, "grad_norm": 1.6063649654388428, "learning_rate": 2.0360024273081015e-06, "loss": 0.7475, "step": 20127 }, { "epoch": 0.7109572650327389, "grad_norm": 1.6333421468734741, "learning_rate": 2.0355417785095873e-06, "loss": 0.7931, "step": 20128 }, { "epoch": 0.7109925868364468, "grad_norm": 0.9573134183883667, "learning_rate": 2.035081168509347e-06, "loss": 0.5776, "step": 20129 }, { "epoch": 0.7110279086401547, "grad_norm": 1.6431455612182617, "learning_rate": 2.034620597313405e-06, "loss": 0.74, "step": 20130 }, { "epoch": 0.7110632304438627, "grad_norm": 1.699418306350708, "learning_rate": 2.034160064927792e-06, "loss": 0.7842, "step": 20131 }, { "epoch": 0.7110985522475706, "grad_norm": 1.6336264610290527, "learning_rate": 2.0336995713585354e-06, "loss": 0.772, "step": 20132 }, { "epoch": 0.7111338740512785, "grad_norm": 1.7274514436721802, "learning_rate": 2.03323911661166e-06, "loss": 0.7944, "step": 20133 }, { "epoch": 0.7111691958549863, "grad_norm": 1.7734839916229248, "learning_rate": 2.032778700693193e-06, "loss": 0.7758, "step": 20134 }, { "epoch": 0.7112045176586942, "grad_norm": 1.6704775094985962, "learning_rate": 2.0323183236091625e-06, "loss": 0.7812, "step": 20135 }, { "epoch": 0.7112398394624021, "grad_norm": 1.6404343843460083, "learning_rate": 2.0318579853655908e-06, "loss": 0.7971, "step": 20136 }, { "epoch": 0.71127516126611, "grad_norm": 1.9668101072311401, "learning_rate": 2.031397685968503e-06, "loss": 0.7846, "step": 20137 }, { "epoch": 0.7113104830698179, "grad_norm": 1.6528557538986206, "learning_rate": 2.030937425423926e-06, "loss": 0.7931, "step": 20138 }, { "epoch": 0.7113458048735258, "grad_norm": 1.6006731986999512, "learning_rate": 2.0304772037378808e-06, "loss": 0.7852, "step": 20139 }, { "epoch": 0.7113811266772337, "grad_norm": 1.6153912544250488, "learning_rate": 2.0300170209163926e-06, "loss": 0.7916, "step": 20140 }, { "epoch": 0.7114164484809417, "grad_norm": 1.5493501424789429, "learning_rate": 2.0295568769654823e-06, "loss": 0.7536, "step": 20141 }, { "epoch": 0.7114517702846496, "grad_norm": 1.7029626369476318, "learning_rate": 2.029096771891175e-06, "loss": 0.7516, "step": 20142 }, { "epoch": 0.7114870920883575, "grad_norm": 1.768899917602539, "learning_rate": 2.0286367056994883e-06, "loss": 0.7768, "step": 20143 }, { "epoch": 0.7115224138920654, "grad_norm": 5.634205341339111, "learning_rate": 2.0281766783964455e-06, "loss": 0.8179, "step": 20144 }, { "epoch": 0.7115577356957733, "grad_norm": 1.69560706615448, "learning_rate": 2.0277166899880707e-06, "loss": 0.793, "step": 20145 }, { "epoch": 0.7115930574994812, "grad_norm": 1.6613870859146118, "learning_rate": 2.0272567404803785e-06, "loss": 0.7907, "step": 20146 }, { "epoch": 0.7116283793031891, "grad_norm": 1.5979613065719604, "learning_rate": 2.0267968298793918e-06, "loss": 0.7866, "step": 20147 }, { "epoch": 0.711663701106897, "grad_norm": 1.6190093755722046, "learning_rate": 2.026336958191131e-06, "loss": 0.7949, "step": 20148 }, { "epoch": 0.7116990229106049, "grad_norm": 1.7524170875549316, "learning_rate": 2.025877125421611e-06, "loss": 0.7844, "step": 20149 }, { "epoch": 0.7117343447143128, "grad_norm": 1.6620110273361206, "learning_rate": 2.0254173315768523e-06, "loss": 0.7565, "step": 20150 }, { "epoch": 0.7117696665180208, "grad_norm": 1.451804518699646, "learning_rate": 2.024957576662874e-06, "loss": 0.7134, "step": 20151 }, { "epoch": 0.7118049883217287, "grad_norm": 1.5045844316482544, "learning_rate": 2.02449786068569e-06, "loss": 0.7864, "step": 20152 }, { "epoch": 0.7118403101254366, "grad_norm": 1.8142132759094238, "learning_rate": 2.0240381836513185e-06, "loss": 0.7906, "step": 20153 }, { "epoch": 0.7118756319291445, "grad_norm": 1.9778459072113037, "learning_rate": 2.0235785455657773e-06, "loss": 0.7564, "step": 20154 }, { "epoch": 0.7119109537328524, "grad_norm": 1.635263204574585, "learning_rate": 2.02311894643508e-06, "loss": 0.7725, "step": 20155 }, { "epoch": 0.7119462755365603, "grad_norm": 1.7602797746658325, "learning_rate": 2.0226593862652416e-06, "loss": 0.7889, "step": 20156 }, { "epoch": 0.7119815973402682, "grad_norm": 1.5614874362945557, "learning_rate": 2.022199865062279e-06, "loss": 0.7447, "step": 20157 }, { "epoch": 0.7120169191439761, "grad_norm": 1.6976909637451172, "learning_rate": 2.021740382832205e-06, "loss": 0.7855, "step": 20158 }, { "epoch": 0.712052240947684, "grad_norm": 1.82088041305542, "learning_rate": 2.0212809395810316e-06, "loss": 0.7544, "step": 20159 }, { "epoch": 0.7120875627513918, "grad_norm": 1.6711195707321167, "learning_rate": 2.020821535314773e-06, "loss": 0.7801, "step": 20160 }, { "epoch": 0.7121228845550998, "grad_norm": 1.7188245058059692, "learning_rate": 2.0203621700394437e-06, "loss": 0.766, "step": 20161 }, { "epoch": 0.7121582063588077, "grad_norm": 1.6101537942886353, "learning_rate": 2.019902843761053e-06, "loss": 0.7943, "step": 20162 }, { "epoch": 0.7121935281625156, "grad_norm": 1.6281996965408325, "learning_rate": 2.0194435564856134e-06, "loss": 0.7546, "step": 20163 }, { "epoch": 0.7122288499662235, "grad_norm": 1.5777174234390259, "learning_rate": 2.018984308219138e-06, "loss": 0.7345, "step": 20164 }, { "epoch": 0.7122641717699314, "grad_norm": 1.709155559539795, "learning_rate": 2.0185250989676335e-06, "loss": 0.7542, "step": 20165 }, { "epoch": 0.7122994935736393, "grad_norm": 1.7519886493682861, "learning_rate": 2.0180659287371125e-06, "loss": 0.7838, "step": 20166 }, { "epoch": 0.7123348153773472, "grad_norm": 1.6900137662887573, "learning_rate": 2.0176067975335862e-06, "loss": 0.763, "step": 20167 }, { "epoch": 0.7123701371810551, "grad_norm": 1.6081079244613647, "learning_rate": 2.017147705363059e-06, "loss": 0.7796, "step": 20168 }, { "epoch": 0.712405458984763, "grad_norm": 1.796126127243042, "learning_rate": 2.016688652231542e-06, "loss": 0.7609, "step": 20169 }, { "epoch": 0.712440780788471, "grad_norm": 1.6370594501495361, "learning_rate": 2.0162296381450434e-06, "loss": 0.7839, "step": 20170 }, { "epoch": 0.7124761025921789, "grad_norm": 1.9102938175201416, "learning_rate": 2.015770663109572e-06, "loss": 0.7336, "step": 20171 }, { "epoch": 0.7125114243958868, "grad_norm": 1.7210097312927246, "learning_rate": 2.0153117271311316e-06, "loss": 0.7817, "step": 20172 }, { "epoch": 0.7125467461995947, "grad_norm": 1.7433308362960815, "learning_rate": 2.01485283021573e-06, "loss": 0.7714, "step": 20173 }, { "epoch": 0.7125820680033026, "grad_norm": 1.91849684715271, "learning_rate": 2.0143939723693757e-06, "loss": 0.7864, "step": 20174 }, { "epoch": 0.7126173898070105, "grad_norm": 1.8290787935256958, "learning_rate": 2.0139351535980695e-06, "loss": 0.7927, "step": 20175 }, { "epoch": 0.7126527116107184, "grad_norm": 1.7819840908050537, "learning_rate": 2.013476373907819e-06, "loss": 0.7674, "step": 20176 }, { "epoch": 0.7126880334144263, "grad_norm": 1.943205714225769, "learning_rate": 2.013017633304632e-06, "loss": 0.7967, "step": 20177 }, { "epoch": 0.7127233552181342, "grad_norm": 1.8050775527954102, "learning_rate": 2.012558931794505e-06, "loss": 0.7481, "step": 20178 }, { "epoch": 0.7127586770218421, "grad_norm": 1.7197991609573364, "learning_rate": 2.0121002693834444e-06, "loss": 0.773, "step": 20179 }, { "epoch": 0.7127939988255501, "grad_norm": 1.7443733215332031, "learning_rate": 2.0116416460774544e-06, "loss": 0.7649, "step": 20180 }, { "epoch": 0.712829320629258, "grad_norm": 1.834320306777954, "learning_rate": 2.011183061882539e-06, "loss": 0.7919, "step": 20181 }, { "epoch": 0.7128646424329659, "grad_norm": 0.9110531210899353, "learning_rate": 2.0107245168046958e-06, "loss": 0.5655, "step": 20182 }, { "epoch": 0.7128999642366738, "grad_norm": 1.7214670181274414, "learning_rate": 2.0102660108499283e-06, "loss": 0.7684, "step": 20183 }, { "epoch": 0.7129352860403817, "grad_norm": 1.5630970001220703, "learning_rate": 2.0098075440242388e-06, "loss": 0.7799, "step": 20184 }, { "epoch": 0.7129706078440896, "grad_norm": 1.739175796508789, "learning_rate": 2.009349116333624e-06, "loss": 0.7691, "step": 20185 }, { "epoch": 0.7130059296477974, "grad_norm": 1.7734612226486206, "learning_rate": 2.0088907277840864e-06, "loss": 0.7641, "step": 20186 }, { "epoch": 0.7130412514515053, "grad_norm": 1.6507601737976074, "learning_rate": 2.008432378381626e-06, "loss": 0.7601, "step": 20187 }, { "epoch": 0.7130765732552132, "grad_norm": 1.6019482612609863, "learning_rate": 2.0079740681322392e-06, "loss": 0.7899, "step": 20188 }, { "epoch": 0.7131118950589211, "grad_norm": 1.7463507652282715, "learning_rate": 2.0075157970419246e-06, "loss": 0.7802, "step": 20189 }, { "epoch": 0.713147216862629, "grad_norm": 1.7922013998031616, "learning_rate": 2.0070575651166825e-06, "loss": 0.736, "step": 20190 }, { "epoch": 0.713182538666337, "grad_norm": 1.706475853919983, "learning_rate": 2.006599372362507e-06, "loss": 0.7709, "step": 20191 }, { "epoch": 0.7132178604700449, "grad_norm": 1.6254256963729858, "learning_rate": 2.0061412187853956e-06, "loss": 0.7451, "step": 20192 }, { "epoch": 0.7132531822737528, "grad_norm": 1.670781135559082, "learning_rate": 2.0056831043913477e-06, "loss": 0.7797, "step": 20193 }, { "epoch": 0.7132885040774607, "grad_norm": 1.5731863975524902, "learning_rate": 2.0052250291863544e-06, "loss": 0.8031, "step": 20194 }, { "epoch": 0.7133238258811686, "grad_norm": 1.6533305644989014, "learning_rate": 2.004766993176415e-06, "loss": 0.7652, "step": 20195 }, { "epoch": 0.7133591476848765, "grad_norm": 5.304126739501953, "learning_rate": 2.00430899636752e-06, "loss": 0.761, "step": 20196 }, { "epoch": 0.7133944694885844, "grad_norm": 1.6055915355682373, "learning_rate": 2.0038510387656674e-06, "loss": 0.7936, "step": 20197 }, { "epoch": 0.7134297912922923, "grad_norm": 1.6969996690750122, "learning_rate": 2.0033931203768477e-06, "loss": 0.748, "step": 20198 }, { "epoch": 0.7134651130960002, "grad_norm": 1.9418669939041138, "learning_rate": 2.0029352412070553e-06, "loss": 0.7898, "step": 20199 }, { "epoch": 0.7135004348997082, "grad_norm": 1.9403436183929443, "learning_rate": 2.002477401262285e-06, "loss": 0.8094, "step": 20200 }, { "epoch": 0.7135357567034161, "grad_norm": 1.792614221572876, "learning_rate": 2.0020196005485254e-06, "loss": 0.7692, "step": 20201 }, { "epoch": 0.713571078507124, "grad_norm": 1.8730145692825317, "learning_rate": 2.0015618390717693e-06, "loss": 0.7567, "step": 20202 }, { "epoch": 0.7136064003108319, "grad_norm": 1.721627950668335, "learning_rate": 2.0011041168380102e-06, "loss": 0.7908, "step": 20203 }, { "epoch": 0.7136417221145398, "grad_norm": 1.5963689088821411, "learning_rate": 2.000646433853235e-06, "loss": 0.734, "step": 20204 }, { "epoch": 0.7136770439182477, "grad_norm": 1.5991116762161255, "learning_rate": 2.0001887901234353e-06, "loss": 0.7383, "step": 20205 }, { "epoch": 0.7137123657219556, "grad_norm": 1.9062607288360596, "learning_rate": 1.9997311856546028e-06, "loss": 0.7673, "step": 20206 }, { "epoch": 0.7137476875256635, "grad_norm": 1.9600073099136353, "learning_rate": 1.9992736204527223e-06, "loss": 0.7473, "step": 20207 }, { "epoch": 0.7137830093293714, "grad_norm": 1.7228552103042603, "learning_rate": 1.998816094523785e-06, "loss": 0.7911, "step": 20208 }, { "epoch": 0.7138183311330794, "grad_norm": 1.7393105030059814, "learning_rate": 1.9983586078737806e-06, "loss": 0.7398, "step": 20209 }, { "epoch": 0.7138536529367873, "grad_norm": 1.6856248378753662, "learning_rate": 1.997901160508692e-06, "loss": 0.7674, "step": 20210 }, { "epoch": 0.7138889747404952, "grad_norm": 1.9134166240692139, "learning_rate": 1.9974437524345093e-06, "loss": 0.8106, "step": 20211 }, { "epoch": 0.713924296544203, "grad_norm": 1.7608065605163574, "learning_rate": 1.9969863836572196e-06, "loss": 0.7749, "step": 20212 }, { "epoch": 0.7139596183479109, "grad_norm": 1.8135663270950317, "learning_rate": 1.996529054182806e-06, "loss": 0.7843, "step": 20213 }, { "epoch": 0.7139949401516188, "grad_norm": 1.9969689846038818, "learning_rate": 1.9960717640172573e-06, "loss": 0.7628, "step": 20214 }, { "epoch": 0.7140302619553267, "grad_norm": 1.6254409551620483, "learning_rate": 1.995614513166555e-06, "loss": 0.7518, "step": 20215 }, { "epoch": 0.7140655837590346, "grad_norm": 1.6093512773513794, "learning_rate": 1.995157301636687e-06, "loss": 0.7654, "step": 20216 }, { "epoch": 0.7141009055627425, "grad_norm": 1.809768557548523, "learning_rate": 1.994700129433633e-06, "loss": 0.788, "step": 20217 }, { "epoch": 0.7141362273664504, "grad_norm": 1.6259050369262695, "learning_rate": 1.99424299656338e-06, "loss": 0.7574, "step": 20218 }, { "epoch": 0.7141715491701583, "grad_norm": 1.8053483963012695, "learning_rate": 1.9937859030319108e-06, "loss": 0.822, "step": 20219 }, { "epoch": 0.7142068709738663, "grad_norm": 1.6183704137802124, "learning_rate": 1.9933288488452047e-06, "loss": 0.7628, "step": 20220 }, { "epoch": 0.7142421927775742, "grad_norm": 1.5937538146972656, "learning_rate": 1.992871834009245e-06, "loss": 0.7477, "step": 20221 }, { "epoch": 0.7142775145812821, "grad_norm": 1.6520594358444214, "learning_rate": 1.9924148585300157e-06, "loss": 0.7661, "step": 20222 }, { "epoch": 0.71431283638499, "grad_norm": 1.6776432991027832, "learning_rate": 1.9919579224134937e-06, "loss": 0.7741, "step": 20223 }, { "epoch": 0.7143481581886979, "grad_norm": 1.9890764951705933, "learning_rate": 1.991501025665661e-06, "loss": 0.7644, "step": 20224 }, { "epoch": 0.7143834799924058, "grad_norm": 1.5246391296386719, "learning_rate": 1.9910441682924993e-06, "loss": 0.7868, "step": 20225 }, { "epoch": 0.7144188017961137, "grad_norm": 1.6920222043991089, "learning_rate": 1.9905873502999836e-06, "loss": 0.7502, "step": 20226 }, { "epoch": 0.7144541235998216, "grad_norm": 1.998947262763977, "learning_rate": 1.990130571694095e-06, "loss": 0.7962, "step": 20227 }, { "epoch": 0.7144894454035295, "grad_norm": 1.0168192386627197, "learning_rate": 1.9896738324808116e-06, "loss": 0.5913, "step": 20228 }, { "epoch": 0.7145247672072375, "grad_norm": 1.6955444812774658, "learning_rate": 1.9892171326661137e-06, "loss": 0.7914, "step": 20229 }, { "epoch": 0.7145600890109454, "grad_norm": 1.5872060060501099, "learning_rate": 1.9887604722559735e-06, "loss": 0.7442, "step": 20230 }, { "epoch": 0.7145954108146533, "grad_norm": 1.6501270532608032, "learning_rate": 1.98830385125637e-06, "loss": 0.8191, "step": 20231 }, { "epoch": 0.7146307326183612, "grad_norm": 4.304833889007568, "learning_rate": 1.9878472696732837e-06, "loss": 0.7755, "step": 20232 }, { "epoch": 0.7146660544220691, "grad_norm": 1.952122449874878, "learning_rate": 1.9873907275126825e-06, "loss": 0.7245, "step": 20233 }, { "epoch": 0.714701376225777, "grad_norm": 1.8760106563568115, "learning_rate": 1.986934224780545e-06, "loss": 0.7729, "step": 20234 }, { "epoch": 0.7147366980294849, "grad_norm": 1.7342092990875244, "learning_rate": 1.986477761482848e-06, "loss": 0.7685, "step": 20235 }, { "epoch": 0.7147720198331928, "grad_norm": 1.6186579465866089, "learning_rate": 1.9860213376255617e-06, "loss": 0.7795, "step": 20236 }, { "epoch": 0.7148073416369007, "grad_norm": 1.8920499086380005, "learning_rate": 1.9855649532146614e-06, "loss": 0.7764, "step": 20237 }, { "epoch": 0.7148426634406086, "grad_norm": 1.857970952987671, "learning_rate": 1.9851086082561205e-06, "loss": 0.7792, "step": 20238 }, { "epoch": 0.7148779852443164, "grad_norm": 1.7764787673950195, "learning_rate": 1.9846523027559128e-06, "loss": 0.7823, "step": 20239 }, { "epoch": 0.7149133070480244, "grad_norm": 1.6559410095214844, "learning_rate": 1.9841960367200077e-06, "loss": 0.7807, "step": 20240 }, { "epoch": 0.7149486288517323, "grad_norm": 1.7438982725143433, "learning_rate": 1.9837398101543776e-06, "loss": 0.8009, "step": 20241 }, { "epoch": 0.7149839506554402, "grad_norm": 1.6652476787567139, "learning_rate": 1.983283623064996e-06, "loss": 0.7873, "step": 20242 }, { "epoch": 0.7150192724591481, "grad_norm": 1.583248257637024, "learning_rate": 1.982827475457829e-06, "loss": 0.7609, "step": 20243 }, { "epoch": 0.715054594262856, "grad_norm": 1.7157459259033203, "learning_rate": 1.982371367338849e-06, "loss": 0.8205, "step": 20244 }, { "epoch": 0.7150899160665639, "grad_norm": 1.673444151878357, "learning_rate": 1.981915298714027e-06, "loss": 0.7531, "step": 20245 }, { "epoch": 0.7151252378702718, "grad_norm": 1.7888617515563965, "learning_rate": 1.9814592695893286e-06, "loss": 0.7522, "step": 20246 }, { "epoch": 0.7151605596739797, "grad_norm": 1.7136222124099731, "learning_rate": 1.9810032799707242e-06, "loss": 0.7771, "step": 20247 }, { "epoch": 0.7151958814776876, "grad_norm": 1.6309700012207031, "learning_rate": 1.9805473298641824e-06, "loss": 0.7678, "step": 20248 }, { "epoch": 0.7152312032813956, "grad_norm": 1.8016526699066162, "learning_rate": 1.9800914192756684e-06, "loss": 0.745, "step": 20249 }, { "epoch": 0.7152665250851035, "grad_norm": 1.6750792264938354, "learning_rate": 1.9796355482111516e-06, "loss": 0.7645, "step": 20250 }, { "epoch": 0.7153018468888114, "grad_norm": 1.8523582220077515, "learning_rate": 1.9791797166765957e-06, "loss": 0.7467, "step": 20251 }, { "epoch": 0.7153371686925193, "grad_norm": 1.552415132522583, "learning_rate": 1.978723924677969e-06, "loss": 0.7512, "step": 20252 }, { "epoch": 0.7153724904962272, "grad_norm": 1.7125427722930908, "learning_rate": 1.9782681722212343e-06, "loss": 0.779, "step": 20253 }, { "epoch": 0.7154078122999351, "grad_norm": 1.6447935104370117, "learning_rate": 1.9778124593123577e-06, "loss": 0.7783, "step": 20254 }, { "epoch": 0.715443134103643, "grad_norm": 1.6579508781433105, "learning_rate": 1.977356785957305e-06, "loss": 0.7457, "step": 20255 }, { "epoch": 0.7154784559073509, "grad_norm": 1.729666829109192, "learning_rate": 1.976901152162037e-06, "loss": 0.8048, "step": 20256 }, { "epoch": 0.7155137777110588, "grad_norm": 1.7723920345306396, "learning_rate": 1.9764455579325183e-06, "loss": 0.7938, "step": 20257 }, { "epoch": 0.7155490995147668, "grad_norm": 1.6049599647521973, "learning_rate": 1.975990003274713e-06, "loss": 0.747, "step": 20258 }, { "epoch": 0.7155844213184747, "grad_norm": 1.7705923318862915, "learning_rate": 1.9755344881945814e-06, "loss": 0.7695, "step": 20259 }, { "epoch": 0.7156197431221826, "grad_norm": 1.56842839717865, "learning_rate": 1.9750790126980852e-06, "loss": 0.753, "step": 20260 }, { "epoch": 0.7156550649258905, "grad_norm": 1.7161142826080322, "learning_rate": 1.974623576791188e-06, "loss": 0.7623, "step": 20261 }, { "epoch": 0.7156903867295984, "grad_norm": 1.6240832805633545, "learning_rate": 1.9741681804798474e-06, "loss": 0.7715, "step": 20262 }, { "epoch": 0.7157257085333063, "grad_norm": 1.6181303262710571, "learning_rate": 1.9737128237700244e-06, "loss": 0.7479, "step": 20263 }, { "epoch": 0.7157610303370142, "grad_norm": 1.94233238697052, "learning_rate": 1.9732575066676814e-06, "loss": 0.7848, "step": 20264 }, { "epoch": 0.715796352140722, "grad_norm": 1.7167739868164062, "learning_rate": 1.972802229178773e-06, "loss": 0.81, "step": 20265 }, { "epoch": 0.7158316739444299, "grad_norm": 1.6792194843292236, "learning_rate": 1.97234699130926e-06, "loss": 0.7617, "step": 20266 }, { "epoch": 0.7158669957481378, "grad_norm": 1.5550585985183716, "learning_rate": 1.971891793065102e-06, "loss": 0.7143, "step": 20267 }, { "epoch": 0.7159023175518457, "grad_norm": 1.668211579322815, "learning_rate": 1.9714366344522533e-06, "loss": 0.7765, "step": 20268 }, { "epoch": 0.7159376393555537, "grad_norm": 1.739550232887268, "learning_rate": 1.970981515476675e-06, "loss": 0.7958, "step": 20269 }, { "epoch": 0.7159729611592616, "grad_norm": 1.518194556236267, "learning_rate": 1.970526436144319e-06, "loss": 0.7437, "step": 20270 }, { "epoch": 0.7160082829629695, "grad_norm": 2.1469571590423584, "learning_rate": 1.970071396461145e-06, "loss": 0.7772, "step": 20271 }, { "epoch": 0.7160436047666774, "grad_norm": 2.142725706100464, "learning_rate": 1.9696163964331057e-06, "loss": 0.7814, "step": 20272 }, { "epoch": 0.7160789265703853, "grad_norm": 1.7069860696792603, "learning_rate": 1.969161436066157e-06, "loss": 0.7744, "step": 20273 }, { "epoch": 0.7161142483740932, "grad_norm": 1.6312804222106934, "learning_rate": 1.968706515366256e-06, "loss": 0.7351, "step": 20274 }, { "epoch": 0.7161495701778011, "grad_norm": 1.656385064125061, "learning_rate": 1.9682516343393523e-06, "loss": 0.7632, "step": 20275 }, { "epoch": 0.716184891981509, "grad_norm": 1.561102271080017, "learning_rate": 1.967796792991401e-06, "loss": 0.7457, "step": 20276 }, { "epoch": 0.7162202137852169, "grad_norm": 1.7291020154953003, "learning_rate": 1.967341991328357e-06, "loss": 0.7494, "step": 20277 }, { "epoch": 0.7162555355889249, "grad_norm": 1.5631623268127441, "learning_rate": 1.9668872293561696e-06, "loss": 0.7538, "step": 20278 }, { "epoch": 0.7162908573926328, "grad_norm": 1.7652167081832886, "learning_rate": 1.9664325070807915e-06, "loss": 0.7818, "step": 20279 }, { "epoch": 0.7163261791963407, "grad_norm": 1.6269683837890625, "learning_rate": 1.965977824508177e-06, "loss": 0.7713, "step": 20280 }, { "epoch": 0.7163615010000486, "grad_norm": 1.6079180240631104, "learning_rate": 1.965523181644272e-06, "loss": 0.7222, "step": 20281 }, { "epoch": 0.7163968228037565, "grad_norm": 1.8017696142196655, "learning_rate": 1.9650685784950295e-06, "loss": 0.7791, "step": 20282 }, { "epoch": 0.7164321446074644, "grad_norm": 2.1357133388519287, "learning_rate": 1.9646140150664005e-06, "loss": 0.7868, "step": 20283 }, { "epoch": 0.7164674664111723, "grad_norm": 2.8857386112213135, "learning_rate": 1.9641594913643308e-06, "loss": 0.7856, "step": 20284 }, { "epoch": 0.7165027882148802, "grad_norm": 1.7375760078430176, "learning_rate": 1.9637050073947716e-06, "loss": 0.7859, "step": 20285 }, { "epoch": 0.7165381100185881, "grad_norm": 1.8200887441635132, "learning_rate": 1.96325056316367e-06, "loss": 0.7984, "step": 20286 }, { "epoch": 0.716573431822296, "grad_norm": 1.7635103464126587, "learning_rate": 1.962796158676978e-06, "loss": 0.7981, "step": 20287 }, { "epoch": 0.716608753626004, "grad_norm": 1.5616940259933472, "learning_rate": 1.962341793940635e-06, "loss": 0.7435, "step": 20288 }, { "epoch": 0.7166440754297119, "grad_norm": 1.5965051651000977, "learning_rate": 1.9618874689605918e-06, "loss": 0.7772, "step": 20289 }, { "epoch": 0.7166793972334198, "grad_norm": 1.6515611410140991, "learning_rate": 1.9614331837427964e-06, "loss": 0.7616, "step": 20290 }, { "epoch": 0.7167147190371276, "grad_norm": 1.6248536109924316, "learning_rate": 1.96097893829319e-06, "loss": 0.768, "step": 20291 }, { "epoch": 0.7167500408408355, "grad_norm": 1.8026254177093506, "learning_rate": 1.96052473261772e-06, "loss": 0.7908, "step": 20292 }, { "epoch": 0.7167853626445434, "grad_norm": 1.5824817419052124, "learning_rate": 1.960070566722333e-06, "loss": 0.7457, "step": 20293 }, { "epoch": 0.7168206844482513, "grad_norm": 0.9997862577438354, "learning_rate": 1.959616440612969e-06, "loss": 0.5749, "step": 20294 }, { "epoch": 0.7168560062519592, "grad_norm": 1.6878037452697754, "learning_rate": 1.959162354295574e-06, "loss": 0.7893, "step": 20295 }, { "epoch": 0.7168913280556671, "grad_norm": 1.7265422344207764, "learning_rate": 1.95870830777609e-06, "loss": 0.7526, "step": 20296 }, { "epoch": 0.716926649859375, "grad_norm": 1.7327646017074585, "learning_rate": 1.9582543010604617e-06, "loss": 0.7595, "step": 20297 }, { "epoch": 0.716961971663083, "grad_norm": 1.49692702293396, "learning_rate": 1.9578003341546274e-06, "loss": 0.7357, "step": 20298 }, { "epoch": 0.7169972934667909, "grad_norm": 1.6546176671981812, "learning_rate": 1.957346407064531e-06, "loss": 0.7325, "step": 20299 }, { "epoch": 0.7170326152704988, "grad_norm": 1.7408078908920288, "learning_rate": 1.9568925197961143e-06, "loss": 0.7661, "step": 20300 }, { "epoch": 0.7170679370742067, "grad_norm": 1.7412029504776, "learning_rate": 1.9564386723553148e-06, "loss": 0.7582, "step": 20301 }, { "epoch": 0.7171032588779146, "grad_norm": 1.7297167778015137, "learning_rate": 1.9559848647480734e-06, "loss": 0.7519, "step": 20302 }, { "epoch": 0.7171385806816225, "grad_norm": 1.8267161846160889, "learning_rate": 1.955531096980332e-06, "loss": 0.7748, "step": 20303 }, { "epoch": 0.7171739024853304, "grad_norm": 1.5956592559814453, "learning_rate": 1.9550773690580257e-06, "loss": 0.7242, "step": 20304 }, { "epoch": 0.7172092242890383, "grad_norm": 1.6262489557266235, "learning_rate": 1.954623680987094e-06, "loss": 0.7747, "step": 20305 }, { "epoch": 0.7172445460927462, "grad_norm": 1.6791719198226929, "learning_rate": 1.954170032773477e-06, "loss": 0.7589, "step": 20306 }, { "epoch": 0.7172798678964541, "grad_norm": 1.6431875228881836, "learning_rate": 1.95371642442311e-06, "loss": 0.7778, "step": 20307 }, { "epoch": 0.7173151897001621, "grad_norm": 1.7022795677185059, "learning_rate": 1.9532628559419274e-06, "loss": 0.7838, "step": 20308 }, { "epoch": 0.71735051150387, "grad_norm": 2.2260894775390625, "learning_rate": 1.952809327335869e-06, "loss": 0.7815, "step": 20309 }, { "epoch": 0.7173858333075779, "grad_norm": 1.8095005750656128, "learning_rate": 1.9523558386108702e-06, "loss": 0.7634, "step": 20310 }, { "epoch": 0.7174211551112858, "grad_norm": 1.7166398763656616, "learning_rate": 1.9519023897728638e-06, "loss": 0.8138, "step": 20311 }, { "epoch": 0.7174564769149937, "grad_norm": 1.6996989250183105, "learning_rate": 1.9514489808277865e-06, "loss": 0.7693, "step": 20312 }, { "epoch": 0.7174917987187016, "grad_norm": 1.7525932788848877, "learning_rate": 1.9509956117815733e-06, "loss": 0.7882, "step": 20313 }, { "epoch": 0.7175271205224095, "grad_norm": 1.6703459024429321, "learning_rate": 1.9505422826401545e-06, "loss": 0.7882, "step": 20314 }, { "epoch": 0.7175624423261174, "grad_norm": 1.697066307067871, "learning_rate": 1.9500889934094646e-06, "loss": 0.7767, "step": 20315 }, { "epoch": 0.7175977641298253, "grad_norm": 3.0396735668182373, "learning_rate": 1.949635744095439e-06, "loss": 0.7695, "step": 20316 }, { "epoch": 0.7176330859335331, "grad_norm": 1.7014529705047607, "learning_rate": 1.9491825347040054e-06, "loss": 0.734, "step": 20317 }, { "epoch": 0.717668407737241, "grad_norm": 1.7121931314468384, "learning_rate": 1.948729365241097e-06, "loss": 0.7705, "step": 20318 }, { "epoch": 0.717703729540949, "grad_norm": 1.5658190250396729, "learning_rate": 1.9482762357126477e-06, "loss": 0.7869, "step": 20319 }, { "epoch": 0.7177390513446569, "grad_norm": 1.772907018661499, "learning_rate": 1.947823146124583e-06, "loss": 0.7948, "step": 20320 }, { "epoch": 0.7177743731483648, "grad_norm": 1.757951259613037, "learning_rate": 1.947370096482835e-06, "loss": 0.7671, "step": 20321 }, { "epoch": 0.7178096949520727, "grad_norm": 1.6919912099838257, "learning_rate": 1.9469170867933356e-06, "loss": 0.7609, "step": 20322 }, { "epoch": 0.7178450167557806, "grad_norm": 1.6247862577438354, "learning_rate": 1.9464641170620097e-06, "loss": 0.7529, "step": 20323 }, { "epoch": 0.7178803385594885, "grad_norm": 1.6103577613830566, "learning_rate": 1.946011187294789e-06, "loss": 0.7976, "step": 20324 }, { "epoch": 0.7179156603631964, "grad_norm": 1.6111432313919067, "learning_rate": 1.945558297497598e-06, "loss": 0.7619, "step": 20325 }, { "epoch": 0.7179509821669043, "grad_norm": 1.8028028011322021, "learning_rate": 1.9451054476763674e-06, "loss": 0.7778, "step": 20326 }, { "epoch": 0.7179863039706122, "grad_norm": 1.521667718887329, "learning_rate": 1.9446526378370213e-06, "loss": 0.7406, "step": 20327 }, { "epoch": 0.7180216257743202, "grad_norm": 1.6436151266098022, "learning_rate": 1.944199867985487e-06, "loss": 0.7425, "step": 20328 }, { "epoch": 0.7180569475780281, "grad_norm": 1.8213468790054321, "learning_rate": 1.9437471381276913e-06, "loss": 0.764, "step": 20329 }, { "epoch": 0.718092269381736, "grad_norm": 1.6446168422698975, "learning_rate": 1.9432944482695575e-06, "loss": 0.7535, "step": 20330 }, { "epoch": 0.7181275911854439, "grad_norm": 1.7206571102142334, "learning_rate": 1.9428417984170113e-06, "loss": 0.786, "step": 20331 }, { "epoch": 0.7181629129891518, "grad_norm": 2.1045730113983154, "learning_rate": 1.942389188575979e-06, "loss": 0.7944, "step": 20332 }, { "epoch": 0.7181982347928597, "grad_norm": 1.798301100730896, "learning_rate": 1.9419366187523796e-06, "loss": 0.7731, "step": 20333 }, { "epoch": 0.7182335565965676, "grad_norm": 1.7656656503677368, "learning_rate": 1.94148408895214e-06, "loss": 0.7659, "step": 20334 }, { "epoch": 0.7182688784002755, "grad_norm": 1.5720906257629395, "learning_rate": 1.941031599181183e-06, "loss": 0.7523, "step": 20335 }, { "epoch": 0.7183042002039834, "grad_norm": 1.9924958944320679, "learning_rate": 1.9405791494454273e-06, "loss": 0.7797, "step": 20336 }, { "epoch": 0.7183395220076914, "grad_norm": 1.7086420059204102, "learning_rate": 1.940126739750797e-06, "loss": 0.7697, "step": 20337 }, { "epoch": 0.7183748438113993, "grad_norm": 1.7663205862045288, "learning_rate": 1.9396743701032137e-06, "loss": 0.7627, "step": 20338 }, { "epoch": 0.7184101656151072, "grad_norm": 1.5951675176620483, "learning_rate": 1.939222040508596e-06, "loss": 0.7739, "step": 20339 }, { "epoch": 0.7184454874188151, "grad_norm": 1.6209462881088257, "learning_rate": 1.938769750972865e-06, "loss": 0.762, "step": 20340 }, { "epoch": 0.718480809222523, "grad_norm": 1.8270410299301147, "learning_rate": 1.938317501501941e-06, "loss": 0.7536, "step": 20341 }, { "epoch": 0.7185161310262309, "grad_norm": 1.7767372131347656, "learning_rate": 1.937865292101741e-06, "loss": 0.7683, "step": 20342 }, { "epoch": 0.7185514528299387, "grad_norm": 1.8859869241714478, "learning_rate": 1.9374131227781864e-06, "loss": 0.7697, "step": 20343 }, { "epoch": 0.7185867746336466, "grad_norm": 1.8323094844818115, "learning_rate": 1.936960993537191e-06, "loss": 0.787, "step": 20344 }, { "epoch": 0.7186220964373545, "grad_norm": 1.6228731870651245, "learning_rate": 1.936508904384676e-06, "loss": 0.7655, "step": 20345 }, { "epoch": 0.7186574182410624, "grad_norm": 2.6928179264068604, "learning_rate": 1.9360568553265548e-06, "loss": 0.7501, "step": 20346 }, { "epoch": 0.7186927400447704, "grad_norm": 1.6742475032806396, "learning_rate": 1.9356048463687454e-06, "loss": 0.7756, "step": 20347 }, { "epoch": 0.7187280618484783, "grad_norm": 0.7921969890594482, "learning_rate": 1.935152877517166e-06, "loss": 0.5635, "step": 20348 }, { "epoch": 0.7187633836521862, "grad_norm": 1.8518280982971191, "learning_rate": 1.934700948777727e-06, "loss": 0.7595, "step": 20349 }, { "epoch": 0.7187987054558941, "grad_norm": 1.701163649559021, "learning_rate": 1.9342490601563463e-06, "loss": 0.7872, "step": 20350 }, { "epoch": 0.718834027259602, "grad_norm": 1.6656291484832764, "learning_rate": 1.9337972116589393e-06, "loss": 0.7307, "step": 20351 }, { "epoch": 0.7188693490633099, "grad_norm": 2.0557563304901123, "learning_rate": 1.933345403291417e-06, "loss": 0.7811, "step": 20352 }, { "epoch": 0.7189046708670178, "grad_norm": 2.21682071685791, "learning_rate": 1.932893635059692e-06, "loss": 0.7841, "step": 20353 }, { "epoch": 0.7189399926707257, "grad_norm": 1.5689371824264526, "learning_rate": 1.93244190696968e-06, "loss": 0.7251, "step": 20354 }, { "epoch": 0.7189753144744336, "grad_norm": 1.7214056253433228, "learning_rate": 1.9319902190272928e-06, "loss": 0.7994, "step": 20355 }, { "epoch": 0.7190106362781415, "grad_norm": 1.7676990032196045, "learning_rate": 1.9315385712384392e-06, "loss": 0.7902, "step": 20356 }, { "epoch": 0.7190459580818495, "grad_norm": 1.8210722208023071, "learning_rate": 1.9310869636090318e-06, "loss": 0.765, "step": 20357 }, { "epoch": 0.7190812798855574, "grad_norm": 1.8053271770477295, "learning_rate": 1.9306353961449835e-06, "loss": 0.7859, "step": 20358 }, { "epoch": 0.7191166016892653, "grad_norm": 2.169529438018799, "learning_rate": 1.9301838688522e-06, "loss": 0.8018, "step": 20359 }, { "epoch": 0.7191519234929732, "grad_norm": 1.828719139099121, "learning_rate": 1.9297323817365934e-06, "loss": 0.7682, "step": 20360 }, { "epoch": 0.7191872452966811, "grad_norm": 1.654618740081787, "learning_rate": 1.9292809348040752e-06, "loss": 0.7974, "step": 20361 }, { "epoch": 0.719222567100389, "grad_norm": 1.7226084470748901, "learning_rate": 1.928829528060548e-06, "loss": 0.7741, "step": 20362 }, { "epoch": 0.7192578889040969, "grad_norm": 1.6654778718948364, "learning_rate": 1.9283781615119216e-06, "loss": 0.7636, "step": 20363 }, { "epoch": 0.7192932107078048, "grad_norm": 1.9795434474945068, "learning_rate": 1.9279268351641057e-06, "loss": 0.7298, "step": 20364 }, { "epoch": 0.7193285325115127, "grad_norm": 1.5556539297103882, "learning_rate": 1.927475549023007e-06, "loss": 0.7743, "step": 20365 }, { "epoch": 0.7193638543152207, "grad_norm": 1.5909613370895386, "learning_rate": 1.927024303094529e-06, "loss": 0.7447, "step": 20366 }, { "epoch": 0.7193991761189286, "grad_norm": 1.8548808097839355, "learning_rate": 1.9265730973845793e-06, "loss": 0.7728, "step": 20367 }, { "epoch": 0.7194344979226365, "grad_norm": 1.594985008239746, "learning_rate": 1.926121931899065e-06, "loss": 0.7873, "step": 20368 }, { "epoch": 0.7194698197263443, "grad_norm": 1.6000046730041504, "learning_rate": 1.925670806643887e-06, "loss": 0.7661, "step": 20369 }, { "epoch": 0.7195051415300522, "grad_norm": 1.747389793395996, "learning_rate": 1.925219721624952e-06, "loss": 0.7736, "step": 20370 }, { "epoch": 0.7195404633337601, "grad_norm": 1.9537039995193481, "learning_rate": 1.924768676848165e-06, "loss": 0.8116, "step": 20371 }, { "epoch": 0.719575785137468, "grad_norm": 1.6405279636383057, "learning_rate": 1.924317672319425e-06, "loss": 0.7651, "step": 20372 }, { "epoch": 0.7196111069411759, "grad_norm": 2.02197527885437, "learning_rate": 1.923866708044638e-06, "loss": 0.7798, "step": 20373 }, { "epoch": 0.7196464287448838, "grad_norm": 1.5716884136199951, "learning_rate": 1.923415784029707e-06, "loss": 0.7594, "step": 20374 }, { "epoch": 0.7196817505485917, "grad_norm": 1.7647589445114136, "learning_rate": 1.9229649002805295e-06, "loss": 0.7777, "step": 20375 }, { "epoch": 0.7197170723522996, "grad_norm": 1.667967677116394, "learning_rate": 1.9225140568030092e-06, "loss": 0.8065, "step": 20376 }, { "epoch": 0.7197523941560076, "grad_norm": 1.798079490661621, "learning_rate": 1.9220632536030487e-06, "loss": 0.7688, "step": 20377 }, { "epoch": 0.7197877159597155, "grad_norm": 1.6475260257720947, "learning_rate": 1.9216124906865436e-06, "loss": 0.7749, "step": 20378 }, { "epoch": 0.7198230377634234, "grad_norm": 1.5329818725585938, "learning_rate": 1.9211617680593954e-06, "loss": 0.7395, "step": 20379 }, { "epoch": 0.7198583595671313, "grad_norm": 1.6579645872116089, "learning_rate": 1.920711085727505e-06, "loss": 0.7494, "step": 20380 }, { "epoch": 0.7198936813708392, "grad_norm": 2.043891668319702, "learning_rate": 1.920260443696769e-06, "loss": 0.7755, "step": 20381 }, { "epoch": 0.7199290031745471, "grad_norm": 1.5326536893844604, "learning_rate": 1.9198098419730836e-06, "loss": 0.7391, "step": 20382 }, { "epoch": 0.719964324978255, "grad_norm": 1.761171817779541, "learning_rate": 1.9193592805623484e-06, "loss": 0.7744, "step": 20383 }, { "epoch": 0.7199996467819629, "grad_norm": 1.724955677986145, "learning_rate": 1.9189087594704612e-06, "loss": 0.7629, "step": 20384 }, { "epoch": 0.7200349685856708, "grad_norm": 1.7090532779693604, "learning_rate": 1.9184582787033156e-06, "loss": 0.7815, "step": 20385 }, { "epoch": 0.7200702903893788, "grad_norm": 1.6917612552642822, "learning_rate": 1.918007838266809e-06, "loss": 0.7536, "step": 20386 }, { "epoch": 0.7201056121930867, "grad_norm": 1.7080353498458862, "learning_rate": 1.917557438166837e-06, "loss": 0.7717, "step": 20387 }, { "epoch": 0.7201409339967946, "grad_norm": 2.029963731765747, "learning_rate": 1.9171070784092934e-06, "loss": 0.7666, "step": 20388 }, { "epoch": 0.7201762558005025, "grad_norm": 1.9989614486694336, "learning_rate": 1.916656759000072e-06, "loss": 0.7607, "step": 20389 }, { "epoch": 0.7202115776042104, "grad_norm": 1.7195428609848022, "learning_rate": 1.9162064799450697e-06, "loss": 0.7603, "step": 20390 }, { "epoch": 0.7202468994079183, "grad_norm": 1.5979083776474, "learning_rate": 1.9157562412501753e-06, "loss": 0.7654, "step": 20391 }, { "epoch": 0.7202822212116262, "grad_norm": 1.6538043022155762, "learning_rate": 1.9153060429212832e-06, "loss": 0.7849, "step": 20392 }, { "epoch": 0.7203175430153341, "grad_norm": 1.8858243227005005, "learning_rate": 1.9148558849642874e-06, "loss": 0.7741, "step": 20393 }, { "epoch": 0.720352864819042, "grad_norm": 1.5302789211273193, "learning_rate": 1.9144057673850765e-06, "loss": 0.7595, "step": 20394 }, { "epoch": 0.7203881866227498, "grad_norm": 1.769710898399353, "learning_rate": 1.913955690189543e-06, "loss": 0.7701, "step": 20395 }, { "epoch": 0.7204235084264577, "grad_norm": 1.6489670276641846, "learning_rate": 1.9135056533835785e-06, "loss": 0.7401, "step": 20396 }, { "epoch": 0.7204588302301657, "grad_norm": 1.703262209892273, "learning_rate": 1.91305565697307e-06, "loss": 0.7425, "step": 20397 }, { "epoch": 0.7204941520338736, "grad_norm": 1.8246408700942993, "learning_rate": 1.9126057009639117e-06, "loss": 0.7859, "step": 20398 }, { "epoch": 0.7205294738375815, "grad_norm": 1.7556430101394653, "learning_rate": 1.9121557853619864e-06, "loss": 0.8079, "step": 20399 }, { "epoch": 0.7205647956412894, "grad_norm": 1.7408456802368164, "learning_rate": 1.9117059101731885e-06, "loss": 0.8062, "step": 20400 }, { "epoch": 0.7206001174449973, "grad_norm": 1.55873703956604, "learning_rate": 1.911256075403401e-06, "loss": 0.7363, "step": 20401 }, { "epoch": 0.7206354392487052, "grad_norm": 1.7028728723526, "learning_rate": 1.9108062810585133e-06, "loss": 0.7861, "step": 20402 }, { "epoch": 0.7206707610524131, "grad_norm": 1.8042024374008179, "learning_rate": 1.910356527144414e-06, "loss": 0.7796, "step": 20403 }, { "epoch": 0.720706082856121, "grad_norm": 1.7655118703842163, "learning_rate": 1.909906813666986e-06, "loss": 0.7748, "step": 20404 }, { "epoch": 0.7207414046598289, "grad_norm": 1.565943717956543, "learning_rate": 1.9094571406321166e-06, "loss": 0.7697, "step": 20405 }, { "epoch": 0.7207767264635369, "grad_norm": 1.7629845142364502, "learning_rate": 1.909007508045693e-06, "loss": 0.7164, "step": 20406 }, { "epoch": 0.7208120482672448, "grad_norm": 1.9155644178390503, "learning_rate": 1.9085579159135968e-06, "loss": 0.7491, "step": 20407 }, { "epoch": 0.7208473700709527, "grad_norm": 3.8882040977478027, "learning_rate": 1.908108364241713e-06, "loss": 0.8264, "step": 20408 }, { "epoch": 0.7208826918746606, "grad_norm": 1.7633527517318726, "learning_rate": 1.9076588530359274e-06, "loss": 0.771, "step": 20409 }, { "epoch": 0.7209180136783685, "grad_norm": 1.9276992082595825, "learning_rate": 1.9072093823021204e-06, "loss": 0.7618, "step": 20410 }, { "epoch": 0.7209533354820764, "grad_norm": 1.7097444534301758, "learning_rate": 1.9067599520461748e-06, "loss": 0.7619, "step": 20411 }, { "epoch": 0.7209886572857843, "grad_norm": 1.726583480834961, "learning_rate": 1.9063105622739741e-06, "loss": 0.8103, "step": 20412 }, { "epoch": 0.7210239790894922, "grad_norm": 1.7448511123657227, "learning_rate": 1.9058612129914006e-06, "loss": 0.775, "step": 20413 }, { "epoch": 0.7210593008932001, "grad_norm": 1.5714218616485596, "learning_rate": 1.9054119042043323e-06, "loss": 0.7721, "step": 20414 }, { "epoch": 0.721094622696908, "grad_norm": 1.5731163024902344, "learning_rate": 1.9049626359186507e-06, "loss": 0.7955, "step": 20415 }, { "epoch": 0.721129944500616, "grad_norm": 1.8663288354873657, "learning_rate": 1.9045134081402389e-06, "loss": 0.7722, "step": 20416 }, { "epoch": 0.7211652663043239, "grad_norm": 3.1612777709960938, "learning_rate": 1.9040642208749733e-06, "loss": 0.7505, "step": 20417 }, { "epoch": 0.7212005881080318, "grad_norm": 2.036369800567627, "learning_rate": 1.9036150741287313e-06, "loss": 0.7211, "step": 20418 }, { "epoch": 0.7212359099117397, "grad_norm": 1.7689825296401978, "learning_rate": 1.9031659679073954e-06, "loss": 0.7853, "step": 20419 }, { "epoch": 0.7212712317154476, "grad_norm": 1.9029258489608765, "learning_rate": 1.902716902216839e-06, "loss": 0.7948, "step": 20420 }, { "epoch": 0.7213065535191554, "grad_norm": 1.7358028888702393, "learning_rate": 1.9022678770629416e-06, "loss": 0.7951, "step": 20421 }, { "epoch": 0.7213418753228633, "grad_norm": 1.864172101020813, "learning_rate": 1.9018188924515802e-06, "loss": 0.7459, "step": 20422 }, { "epoch": 0.7213771971265712, "grad_norm": 1.7967755794525146, "learning_rate": 1.9013699483886322e-06, "loss": 0.82, "step": 20423 }, { "epoch": 0.7214125189302791, "grad_norm": 1.6440939903259277, "learning_rate": 1.9009210448799702e-06, "loss": 0.7585, "step": 20424 }, { "epoch": 0.721447840733987, "grad_norm": 1.6672359704971313, "learning_rate": 1.9004721819314714e-06, "loss": 0.7836, "step": 20425 }, { "epoch": 0.721483162537695, "grad_norm": 1.8015190362930298, "learning_rate": 1.9000233595490113e-06, "loss": 0.8073, "step": 20426 }, { "epoch": 0.7215184843414029, "grad_norm": 1.8158379793167114, "learning_rate": 1.899574577738461e-06, "loss": 0.7982, "step": 20427 }, { "epoch": 0.7215538061451108, "grad_norm": 1.7684948444366455, "learning_rate": 1.899125836505696e-06, "loss": 0.7746, "step": 20428 }, { "epoch": 0.7215891279488187, "grad_norm": 1.6783370971679688, "learning_rate": 1.898677135856591e-06, "loss": 0.7906, "step": 20429 }, { "epoch": 0.7216244497525266, "grad_norm": 1.8435412645339966, "learning_rate": 1.8982284757970143e-06, "loss": 0.7667, "step": 20430 }, { "epoch": 0.7216597715562345, "grad_norm": 1.92830228805542, "learning_rate": 1.8977798563328408e-06, "loss": 0.7977, "step": 20431 }, { "epoch": 0.7216950933599424, "grad_norm": 1.5184946060180664, "learning_rate": 1.8973312774699427e-06, "loss": 0.7521, "step": 20432 }, { "epoch": 0.7217304151636503, "grad_norm": 2.0913212299346924, "learning_rate": 1.8968827392141881e-06, "loss": 0.8096, "step": 20433 }, { "epoch": 0.7217657369673582, "grad_norm": 1.6044542789459229, "learning_rate": 1.8964342415714493e-06, "loss": 0.7488, "step": 20434 }, { "epoch": 0.7218010587710662, "grad_norm": 1.7965681552886963, "learning_rate": 1.895985784547597e-06, "loss": 0.7387, "step": 20435 }, { "epoch": 0.7218363805747741, "grad_norm": 1.5696254968643188, "learning_rate": 1.8955373681484996e-06, "loss": 0.7362, "step": 20436 }, { "epoch": 0.721871702378482, "grad_norm": 2.414860248565674, "learning_rate": 1.895088992380023e-06, "loss": 0.8053, "step": 20437 }, { "epoch": 0.7219070241821899, "grad_norm": 1.6380256414413452, "learning_rate": 1.8946406572480392e-06, "loss": 0.7733, "step": 20438 }, { "epoch": 0.7219423459858978, "grad_norm": 1.75336492061615, "learning_rate": 1.8941923627584158e-06, "loss": 0.761, "step": 20439 }, { "epoch": 0.7219776677896057, "grad_norm": 1.7488293647766113, "learning_rate": 1.8937441089170172e-06, "loss": 0.7614, "step": 20440 }, { "epoch": 0.7220129895933136, "grad_norm": 1.7674338817596436, "learning_rate": 1.8932958957297115e-06, "loss": 0.7821, "step": 20441 }, { "epoch": 0.7220483113970215, "grad_norm": 1.8549909591674805, "learning_rate": 1.8928477232023673e-06, "loss": 0.7627, "step": 20442 }, { "epoch": 0.7220836332007294, "grad_norm": 1.875780463218689, "learning_rate": 1.892399591340846e-06, "loss": 0.7405, "step": 20443 }, { "epoch": 0.7221189550044373, "grad_norm": 1.7276813983917236, "learning_rate": 1.8919515001510153e-06, "loss": 0.7484, "step": 20444 }, { "epoch": 0.7221542768081453, "grad_norm": 2.7236568927764893, "learning_rate": 1.8915034496387403e-06, "loss": 0.7349, "step": 20445 }, { "epoch": 0.7221895986118532, "grad_norm": 2.841697931289673, "learning_rate": 1.8910554398098819e-06, "loss": 0.8036, "step": 20446 }, { "epoch": 0.722224920415561, "grad_norm": 1.6580543518066406, "learning_rate": 1.8906074706703058e-06, "loss": 0.7829, "step": 20447 }, { "epoch": 0.7222602422192689, "grad_norm": 1.6027371883392334, "learning_rate": 1.8901595422258767e-06, "loss": 0.7217, "step": 20448 }, { "epoch": 0.7222955640229768, "grad_norm": 1.7738269567489624, "learning_rate": 1.8897116544824528e-06, "loss": 0.7869, "step": 20449 }, { "epoch": 0.7223308858266847, "grad_norm": 1.9572598934173584, "learning_rate": 1.889263807445898e-06, "loss": 0.7713, "step": 20450 }, { "epoch": 0.7223662076303926, "grad_norm": 2.1007232666015625, "learning_rate": 1.8888160011220758e-06, "loss": 0.7752, "step": 20451 }, { "epoch": 0.7224015294341005, "grad_norm": 1.66596257686615, "learning_rate": 1.8883682355168426e-06, "loss": 0.7731, "step": 20452 }, { "epoch": 0.7224368512378084, "grad_norm": 1.7457588911056519, "learning_rate": 1.8879205106360614e-06, "loss": 0.7785, "step": 20453 }, { "epoch": 0.7224721730415163, "grad_norm": 1.9026089906692505, "learning_rate": 1.8874728264855934e-06, "loss": 0.7679, "step": 20454 }, { "epoch": 0.7225074948452243, "grad_norm": 1.7121493816375732, "learning_rate": 1.887025183071296e-06, "loss": 0.7849, "step": 20455 }, { "epoch": 0.7225428166489322, "grad_norm": 1.5594733953475952, "learning_rate": 1.886577580399026e-06, "loss": 0.7546, "step": 20456 }, { "epoch": 0.7225781384526401, "grad_norm": 1.7025872468948364, "learning_rate": 1.8861300184746435e-06, "loss": 0.756, "step": 20457 }, { "epoch": 0.722613460256348, "grad_norm": 1.6856184005737305, "learning_rate": 1.8856824973040073e-06, "loss": 0.7801, "step": 20458 }, { "epoch": 0.7226487820600559, "grad_norm": 1.7860924005508423, "learning_rate": 1.8852350168929717e-06, "loss": 0.7199, "step": 20459 }, { "epoch": 0.7226841038637638, "grad_norm": 1.6741142272949219, "learning_rate": 1.8847875772473945e-06, "loss": 0.8091, "step": 20460 }, { "epoch": 0.7227194256674717, "grad_norm": 1.811083197593689, "learning_rate": 1.8843401783731336e-06, "loss": 0.7781, "step": 20461 }, { "epoch": 0.7227547474711796, "grad_norm": 1.810452938079834, "learning_rate": 1.8838928202760416e-06, "loss": 0.7618, "step": 20462 }, { "epoch": 0.7227900692748875, "grad_norm": 1.769601821899414, "learning_rate": 1.8834455029619736e-06, "loss": 0.7625, "step": 20463 }, { "epoch": 0.7228253910785954, "grad_norm": 1.491448163986206, "learning_rate": 1.8829982264367875e-06, "loss": 0.7374, "step": 20464 }, { "epoch": 0.7228607128823034, "grad_norm": 1.983366847038269, "learning_rate": 1.8825509907063328e-06, "loss": 0.7671, "step": 20465 }, { "epoch": 0.7228960346860113, "grad_norm": 1.686137318611145, "learning_rate": 1.8821037957764648e-06, "loss": 0.7728, "step": 20466 }, { "epoch": 0.7229313564897192, "grad_norm": 1.8519418239593506, "learning_rate": 1.881656641653038e-06, "loss": 0.7666, "step": 20467 }, { "epoch": 0.7229666782934271, "grad_norm": 1.6370737552642822, "learning_rate": 1.8812095283419007e-06, "loss": 0.7485, "step": 20468 }, { "epoch": 0.723002000097135, "grad_norm": 1.7414286136627197, "learning_rate": 1.8807624558489074e-06, "loss": 0.7733, "step": 20469 }, { "epoch": 0.7230373219008429, "grad_norm": 1.9182246923446655, "learning_rate": 1.8803154241799082e-06, "loss": 0.7885, "step": 20470 }, { "epoch": 0.7230726437045508, "grad_norm": 1.7033908367156982, "learning_rate": 1.8798684333407562e-06, "loss": 0.8188, "step": 20471 }, { "epoch": 0.7231079655082587, "grad_norm": 1.7727080583572388, "learning_rate": 1.8794214833372998e-06, "loss": 0.7578, "step": 20472 }, { "epoch": 0.7231432873119665, "grad_norm": 1.814471960067749, "learning_rate": 1.8789745741753867e-06, "loss": 0.7725, "step": 20473 }, { "epoch": 0.7231786091156744, "grad_norm": 1.7276676893234253, "learning_rate": 1.8785277058608691e-06, "loss": 0.7418, "step": 20474 }, { "epoch": 0.7232139309193824, "grad_norm": 1.7342851161956787, "learning_rate": 1.878080878399593e-06, "loss": 0.7434, "step": 20475 }, { "epoch": 0.7232492527230903, "grad_norm": 1.6729804277420044, "learning_rate": 1.8776340917974067e-06, "loss": 0.7933, "step": 20476 }, { "epoch": 0.7232845745267982, "grad_norm": 1.6443641185760498, "learning_rate": 1.8771873460601608e-06, "loss": 0.7693, "step": 20477 }, { "epoch": 0.7233198963305061, "grad_norm": 1.658489465713501, "learning_rate": 1.876740641193698e-06, "loss": 0.7769, "step": 20478 }, { "epoch": 0.723355218134214, "grad_norm": 1.6046565771102905, "learning_rate": 1.8762939772038662e-06, "loss": 0.8037, "step": 20479 }, { "epoch": 0.7233905399379219, "grad_norm": 1.9849441051483154, "learning_rate": 1.8758473540965121e-06, "loss": 0.7726, "step": 20480 }, { "epoch": 0.7234258617416298, "grad_norm": 1.8664050102233887, "learning_rate": 1.8754007718774819e-06, "loss": 0.7731, "step": 20481 }, { "epoch": 0.7234611835453377, "grad_norm": 1.8374059200286865, "learning_rate": 1.874954230552618e-06, "loss": 0.7422, "step": 20482 }, { "epoch": 0.7234965053490456, "grad_norm": 1.56352698802948, "learning_rate": 1.8745077301277653e-06, "loss": 0.7567, "step": 20483 }, { "epoch": 0.7235318271527535, "grad_norm": 1.8264094591140747, "learning_rate": 1.874061270608769e-06, "loss": 0.7906, "step": 20484 }, { "epoch": 0.7235671489564615, "grad_norm": 1.8530012369155884, "learning_rate": 1.8736148520014703e-06, "loss": 0.768, "step": 20485 }, { "epoch": 0.7236024707601694, "grad_norm": 2.152261257171631, "learning_rate": 1.8731684743117123e-06, "loss": 0.8031, "step": 20486 }, { "epoch": 0.7236377925638773, "grad_norm": 0.8940994739532471, "learning_rate": 1.8727221375453397e-06, "loss": 0.575, "step": 20487 }, { "epoch": 0.7236731143675852, "grad_norm": 1.8529438972473145, "learning_rate": 1.8722758417081898e-06, "loss": 0.8185, "step": 20488 }, { "epoch": 0.7237084361712931, "grad_norm": 1.8539881706237793, "learning_rate": 1.871829586806106e-06, "loss": 0.782, "step": 20489 }, { "epoch": 0.723743757975001, "grad_norm": 1.7383861541748047, "learning_rate": 1.8713833728449298e-06, "loss": 0.7704, "step": 20490 }, { "epoch": 0.7237790797787089, "grad_norm": 1.960046648979187, "learning_rate": 1.8709371998305005e-06, "loss": 0.8057, "step": 20491 }, { "epoch": 0.7238144015824168, "grad_norm": 1.886181354522705, "learning_rate": 1.8704910677686545e-06, "loss": 0.801, "step": 20492 }, { "epoch": 0.7238497233861247, "grad_norm": 1.9018988609313965, "learning_rate": 1.8700449766652334e-06, "loss": 0.7219, "step": 20493 }, { "epoch": 0.7238850451898327, "grad_norm": 1.6009647846221924, "learning_rate": 1.8695989265260773e-06, "loss": 0.7831, "step": 20494 }, { "epoch": 0.7239203669935406, "grad_norm": 1.7128937244415283, "learning_rate": 1.86915291735702e-06, "loss": 0.7779, "step": 20495 }, { "epoch": 0.7239556887972485, "grad_norm": 1.5451023578643799, "learning_rate": 1.868706949163901e-06, "loss": 0.7627, "step": 20496 }, { "epoch": 0.7239910106009564, "grad_norm": 1.845935583114624, "learning_rate": 1.8682610219525581e-06, "loss": 0.8123, "step": 20497 }, { "epoch": 0.7240263324046643, "grad_norm": 1.582388997077942, "learning_rate": 1.867815135728825e-06, "loss": 0.7471, "step": 20498 }, { "epoch": 0.7240616542083721, "grad_norm": 43.56174087524414, "learning_rate": 1.8673692904985385e-06, "loss": 0.7279, "step": 20499 }, { "epoch": 0.72409697601208, "grad_norm": 2.1223573684692383, "learning_rate": 1.8669234862675357e-06, "loss": 0.7503, "step": 20500 }, { "epoch": 0.7241322978157879, "grad_norm": 1.7260596752166748, "learning_rate": 1.866477723041647e-06, "loss": 0.7931, "step": 20501 }, { "epoch": 0.7241676196194958, "grad_norm": 1.6900482177734375, "learning_rate": 1.8660320008267097e-06, "loss": 0.7279, "step": 20502 }, { "epoch": 0.7242029414232037, "grad_norm": 1.8031648397445679, "learning_rate": 1.8655863196285573e-06, "loss": 0.8215, "step": 20503 }, { "epoch": 0.7242382632269116, "grad_norm": 1.6212210655212402, "learning_rate": 1.8651406794530208e-06, "loss": 0.7523, "step": 20504 }, { "epoch": 0.7242735850306196, "grad_norm": 1.6862958669662476, "learning_rate": 1.8646950803059338e-06, "loss": 0.7217, "step": 20505 }, { "epoch": 0.7243089068343275, "grad_norm": 1.6111918687820435, "learning_rate": 1.86424952219313e-06, "loss": 0.8008, "step": 20506 }, { "epoch": 0.7243442286380354, "grad_norm": 2.437819004058838, "learning_rate": 1.863804005120437e-06, "loss": 0.7519, "step": 20507 }, { "epoch": 0.7243795504417433, "grad_norm": 1.7948474884033203, "learning_rate": 1.8633585290936878e-06, "loss": 0.7795, "step": 20508 }, { "epoch": 0.7244148722454512, "grad_norm": 1.5505454540252686, "learning_rate": 1.862913094118714e-06, "loss": 0.7845, "step": 20509 }, { "epoch": 0.7244501940491591, "grad_norm": 1.8562660217285156, "learning_rate": 1.862467700201344e-06, "loss": 0.7737, "step": 20510 }, { "epoch": 0.724485515852867, "grad_norm": 1.9328348636627197, "learning_rate": 1.8620223473474053e-06, "loss": 0.7427, "step": 20511 }, { "epoch": 0.7245208376565749, "grad_norm": 1.526987910270691, "learning_rate": 1.8615770355627284e-06, "loss": 0.7676, "step": 20512 }, { "epoch": 0.7245561594602828, "grad_norm": 1.7410629987716675, "learning_rate": 1.861131764853143e-06, "loss": 0.7336, "step": 20513 }, { "epoch": 0.7245914812639908, "grad_norm": 1.743709921836853, "learning_rate": 1.860686535224473e-06, "loss": 0.7793, "step": 20514 }, { "epoch": 0.7246268030676987, "grad_norm": 1.543800950050354, "learning_rate": 1.860241346682548e-06, "loss": 0.7849, "step": 20515 }, { "epoch": 0.7246621248714066, "grad_norm": 1.7581214904785156, "learning_rate": 1.8597961992331954e-06, "loss": 0.7909, "step": 20516 }, { "epoch": 0.7246974466751145, "grad_norm": 1.7680201530456543, "learning_rate": 1.8593510928822384e-06, "loss": 0.759, "step": 20517 }, { "epoch": 0.7247327684788224, "grad_norm": 1.6474783420562744, "learning_rate": 1.8589060276355037e-06, "loss": 0.7477, "step": 20518 }, { "epoch": 0.7247680902825303, "grad_norm": 1.76932692527771, "learning_rate": 1.8584610034988177e-06, "loss": 0.7677, "step": 20519 }, { "epoch": 0.7248034120862382, "grad_norm": 1.696406364440918, "learning_rate": 1.8580160204780024e-06, "loss": 0.7767, "step": 20520 }, { "epoch": 0.7248387338899461, "grad_norm": 0.9265272617340088, "learning_rate": 1.8575710785788826e-06, "loss": 0.5632, "step": 20521 }, { "epoch": 0.724874055693654, "grad_norm": 1.6653707027435303, "learning_rate": 1.8571261778072842e-06, "loss": 0.7332, "step": 20522 }, { "epoch": 0.724909377497362, "grad_norm": 1.832081913948059, "learning_rate": 1.8566813181690251e-06, "loss": 0.7559, "step": 20523 }, { "epoch": 0.7249446993010699, "grad_norm": 1.719775676727295, "learning_rate": 1.85623649966993e-06, "loss": 0.7832, "step": 20524 }, { "epoch": 0.7249800211047777, "grad_norm": 1.5387998819351196, "learning_rate": 1.855791722315823e-06, "loss": 0.7517, "step": 20525 }, { "epoch": 0.7250153429084856, "grad_norm": 1.6952601671218872, "learning_rate": 1.8553469861125205e-06, "loss": 0.8012, "step": 20526 }, { "epoch": 0.7250506647121935, "grad_norm": 1.7401587963104248, "learning_rate": 1.8549022910658477e-06, "loss": 0.7495, "step": 20527 }, { "epoch": 0.7250859865159014, "grad_norm": 1.6992870569229126, "learning_rate": 1.8544576371816203e-06, "loss": 0.7753, "step": 20528 }, { "epoch": 0.7251213083196093, "grad_norm": 1.783530592918396, "learning_rate": 1.8540130244656618e-06, "loss": 0.7434, "step": 20529 }, { "epoch": 0.7251566301233172, "grad_norm": 1.9831805229187012, "learning_rate": 1.853568452923788e-06, "loss": 0.7715, "step": 20530 }, { "epoch": 0.7251919519270251, "grad_norm": 1.9679386615753174, "learning_rate": 1.8531239225618192e-06, "loss": 0.7485, "step": 20531 }, { "epoch": 0.725227273730733, "grad_norm": 1.6723159551620483, "learning_rate": 1.8526794333855742e-06, "loss": 0.7508, "step": 20532 }, { "epoch": 0.725262595534441, "grad_norm": 1.6644306182861328, "learning_rate": 1.8522349854008676e-06, "loss": 0.7702, "step": 20533 }, { "epoch": 0.7252979173381489, "grad_norm": 1.8743972778320312, "learning_rate": 1.8517905786135177e-06, "loss": 0.8025, "step": 20534 }, { "epoch": 0.7253332391418568, "grad_norm": 1.9476326704025269, "learning_rate": 1.8513462130293425e-06, "loss": 0.7605, "step": 20535 }, { "epoch": 0.7253685609455647, "grad_norm": 1.7574049234390259, "learning_rate": 1.850901888654154e-06, "loss": 0.7609, "step": 20536 }, { "epoch": 0.7254038827492726, "grad_norm": 1.7015219926834106, "learning_rate": 1.8504576054937706e-06, "loss": 0.7642, "step": 20537 }, { "epoch": 0.7254392045529805, "grad_norm": 1.8371632099151611, "learning_rate": 1.8500133635540057e-06, "loss": 0.8224, "step": 20538 }, { "epoch": 0.7254745263566884, "grad_norm": 2.5086283683776855, "learning_rate": 1.8495691628406753e-06, "loss": 0.7725, "step": 20539 }, { "epoch": 0.7255098481603963, "grad_norm": 1.8324086666107178, "learning_rate": 1.8491250033595898e-06, "loss": 0.7328, "step": 20540 }, { "epoch": 0.7255451699641042, "grad_norm": 1.6821365356445312, "learning_rate": 1.8486808851165638e-06, "loss": 0.7133, "step": 20541 }, { "epoch": 0.7255804917678121, "grad_norm": 1.658793568611145, "learning_rate": 1.8482368081174124e-06, "loss": 0.7478, "step": 20542 }, { "epoch": 0.72561581357152, "grad_norm": 1.665362000465393, "learning_rate": 1.8477927723679428e-06, "loss": 0.7282, "step": 20543 }, { "epoch": 0.725651135375228, "grad_norm": 1.8492704629898071, "learning_rate": 1.8473487778739691e-06, "loss": 0.7947, "step": 20544 }, { "epoch": 0.7256864571789359, "grad_norm": 1.5772331953048706, "learning_rate": 1.8469048246413034e-06, "loss": 0.7517, "step": 20545 }, { "epoch": 0.7257217789826438, "grad_norm": 1.8252811431884766, "learning_rate": 1.846460912675755e-06, "loss": 0.7508, "step": 20546 }, { "epoch": 0.7257571007863517, "grad_norm": 1.6986804008483887, "learning_rate": 1.8460170419831313e-06, "loss": 0.7509, "step": 20547 }, { "epoch": 0.7257924225900596, "grad_norm": 1.8576213121414185, "learning_rate": 1.8455732125692438e-06, "loss": 0.7962, "step": 20548 }, { "epoch": 0.7258277443937675, "grad_norm": 1.825336217880249, "learning_rate": 1.845129424439903e-06, "loss": 0.8017, "step": 20549 }, { "epoch": 0.7258630661974754, "grad_norm": 3.481186628341675, "learning_rate": 1.8446856776009137e-06, "loss": 0.7641, "step": 20550 }, { "epoch": 0.7258983880011832, "grad_norm": 1.9734159708023071, "learning_rate": 1.8442419720580845e-06, "loss": 0.7761, "step": 20551 }, { "epoch": 0.7259337098048911, "grad_norm": 1.9097130298614502, "learning_rate": 1.8437983078172256e-06, "loss": 0.74, "step": 20552 }, { "epoch": 0.725969031608599, "grad_norm": 1.000106692314148, "learning_rate": 1.8433546848841389e-06, "loss": 0.5806, "step": 20553 }, { "epoch": 0.726004353412307, "grad_norm": 1.6456496715545654, "learning_rate": 1.842911103264633e-06, "loss": 0.761, "step": 20554 }, { "epoch": 0.7260396752160149, "grad_norm": 1.896884799003601, "learning_rate": 1.8424675629645145e-06, "loss": 0.8186, "step": 20555 }, { "epoch": 0.7260749970197228, "grad_norm": 1.6862937211990356, "learning_rate": 1.8420240639895853e-06, "loss": 0.7669, "step": 20556 }, { "epoch": 0.7261103188234307, "grad_norm": 1.6892502307891846, "learning_rate": 1.8415806063456522e-06, "loss": 0.7521, "step": 20557 }, { "epoch": 0.7261456406271386, "grad_norm": 2.0537242889404297, "learning_rate": 1.8411371900385195e-06, "loss": 0.7725, "step": 20558 }, { "epoch": 0.7261809624308465, "grad_norm": 1.5697299242019653, "learning_rate": 1.8406938150739878e-06, "loss": 0.7647, "step": 20559 }, { "epoch": 0.7262162842345544, "grad_norm": 1.5997531414031982, "learning_rate": 1.8402504814578614e-06, "loss": 0.7882, "step": 20560 }, { "epoch": 0.7262516060382623, "grad_norm": 1.7704265117645264, "learning_rate": 1.8398071891959446e-06, "loss": 0.7785, "step": 20561 }, { "epoch": 0.7262869278419702, "grad_norm": 1.9061286449432373, "learning_rate": 1.8393639382940354e-06, "loss": 0.7142, "step": 20562 }, { "epoch": 0.7263222496456782, "grad_norm": 1.923901081085205, "learning_rate": 1.8389207287579375e-06, "loss": 0.7816, "step": 20563 }, { "epoch": 0.7263575714493861, "grad_norm": 1.7906250953674316, "learning_rate": 1.8384775605934518e-06, "loss": 0.7799, "step": 20564 }, { "epoch": 0.726392893253094, "grad_norm": 2.3420045375823975, "learning_rate": 1.8380344338063778e-06, "loss": 0.7753, "step": 20565 }, { "epoch": 0.7264282150568019, "grad_norm": 1.9986830949783325, "learning_rate": 1.8375913484025132e-06, "loss": 0.7435, "step": 20566 }, { "epoch": 0.7264635368605098, "grad_norm": 1.8811134099960327, "learning_rate": 1.837148304387658e-06, "loss": 0.7985, "step": 20567 }, { "epoch": 0.7264988586642177, "grad_norm": 1.800720453262329, "learning_rate": 1.836705301767614e-06, "loss": 0.7656, "step": 20568 }, { "epoch": 0.7265341804679256, "grad_norm": 1.78974187374115, "learning_rate": 1.836262340548174e-06, "loss": 0.7325, "step": 20569 }, { "epoch": 0.7265695022716335, "grad_norm": 1.4852418899536133, "learning_rate": 1.8358194207351376e-06, "loss": 0.7934, "step": 20570 }, { "epoch": 0.7266048240753414, "grad_norm": 1.6257902383804321, "learning_rate": 1.8353765423343034e-06, "loss": 0.7534, "step": 20571 }, { "epoch": 0.7266401458790493, "grad_norm": 2.0249993801116943, "learning_rate": 1.8349337053514649e-06, "loss": 0.7491, "step": 20572 }, { "epoch": 0.7266754676827573, "grad_norm": 1.6402733325958252, "learning_rate": 1.8344909097924191e-06, "loss": 0.7351, "step": 20573 }, { "epoch": 0.7267107894864652, "grad_norm": 1.6346467733383179, "learning_rate": 1.834048155662963e-06, "loss": 0.7358, "step": 20574 }, { "epoch": 0.7267461112901731, "grad_norm": 2.3913087844848633, "learning_rate": 1.833605442968887e-06, "loss": 0.7813, "step": 20575 }, { "epoch": 0.726781433093881, "grad_norm": 1.797597885131836, "learning_rate": 1.833162771715989e-06, "loss": 0.7595, "step": 20576 }, { "epoch": 0.7268167548975888, "grad_norm": 1.673182487487793, "learning_rate": 1.832720141910062e-06, "loss": 0.739, "step": 20577 }, { "epoch": 0.7268520767012967, "grad_norm": 2.037754535675049, "learning_rate": 1.832277553556897e-06, "loss": 0.7286, "step": 20578 }, { "epoch": 0.7268873985050046, "grad_norm": 1.6541306972503662, "learning_rate": 1.831835006662288e-06, "loss": 0.7585, "step": 20579 }, { "epoch": 0.7269227203087125, "grad_norm": 1.7665393352508545, "learning_rate": 1.8313925012320288e-06, "loss": 0.7488, "step": 20580 }, { "epoch": 0.7269580421124204, "grad_norm": 2.491832733154297, "learning_rate": 1.830950037271907e-06, "loss": 0.7865, "step": 20581 }, { "epoch": 0.7269933639161283, "grad_norm": 1.5438331365585327, "learning_rate": 1.8305076147877148e-06, "loss": 0.739, "step": 20582 }, { "epoch": 0.7270286857198363, "grad_norm": 1.722793698310852, "learning_rate": 1.830065233785246e-06, "loss": 0.7799, "step": 20583 }, { "epoch": 0.7270640075235442, "grad_norm": 1.7112629413604736, "learning_rate": 1.8296228942702866e-06, "loss": 0.7695, "step": 20584 }, { "epoch": 0.7270993293272521, "grad_norm": 1.785886526107788, "learning_rate": 1.8291805962486253e-06, "loss": 0.7571, "step": 20585 }, { "epoch": 0.72713465113096, "grad_norm": 1.5455894470214844, "learning_rate": 1.8287383397260527e-06, "loss": 0.7589, "step": 20586 }, { "epoch": 0.7271699729346679, "grad_norm": 1.8704088926315308, "learning_rate": 1.828296124708358e-06, "loss": 0.743, "step": 20587 }, { "epoch": 0.7272052947383758, "grad_norm": 1.7025359869003296, "learning_rate": 1.8278539512013254e-06, "loss": 0.7573, "step": 20588 }, { "epoch": 0.7272406165420837, "grad_norm": 1.60955011844635, "learning_rate": 1.8274118192107448e-06, "loss": 0.7836, "step": 20589 }, { "epoch": 0.7272759383457916, "grad_norm": 1.6917790174484253, "learning_rate": 1.8269697287424031e-06, "loss": 0.7377, "step": 20590 }, { "epoch": 0.7273112601494995, "grad_norm": 1.5881282091140747, "learning_rate": 1.8265276798020832e-06, "loss": 0.7686, "step": 20591 }, { "epoch": 0.7273465819532074, "grad_norm": 2.214759349822998, "learning_rate": 1.8260856723955727e-06, "loss": 0.7615, "step": 20592 }, { "epoch": 0.7273819037569154, "grad_norm": 1.6661735773086548, "learning_rate": 1.8256437065286581e-06, "loss": 0.7787, "step": 20593 }, { "epoch": 0.7274172255606233, "grad_norm": 1.8425558805465698, "learning_rate": 1.8252017822071205e-06, "loss": 0.7541, "step": 20594 }, { "epoch": 0.7274525473643312, "grad_norm": 1.85452139377594, "learning_rate": 1.8247598994367444e-06, "loss": 0.8104, "step": 20595 }, { "epoch": 0.7274878691680391, "grad_norm": 1.7502225637435913, "learning_rate": 1.8243180582233144e-06, "loss": 0.7733, "step": 20596 }, { "epoch": 0.727523190971747, "grad_norm": 1.7469813823699951, "learning_rate": 1.8238762585726145e-06, "loss": 0.7744, "step": 20597 }, { "epoch": 0.7275585127754549, "grad_norm": 1.5912944078445435, "learning_rate": 1.823434500490423e-06, "loss": 0.7504, "step": 20598 }, { "epoch": 0.7275938345791628, "grad_norm": 1.5822023153305054, "learning_rate": 1.822992783982524e-06, "loss": 0.7355, "step": 20599 }, { "epoch": 0.7276291563828707, "grad_norm": 1.6419451236724854, "learning_rate": 1.8225511090546998e-06, "loss": 0.7534, "step": 20600 }, { "epoch": 0.7276644781865786, "grad_norm": 1.553481101989746, "learning_rate": 1.8221094757127294e-06, "loss": 0.7477, "step": 20601 }, { "epoch": 0.7276997999902866, "grad_norm": 1.7431275844573975, "learning_rate": 1.8216678839623907e-06, "loss": 0.7844, "step": 20602 }, { "epoch": 0.7277351217939944, "grad_norm": 1.6560722589492798, "learning_rate": 1.8212263338094676e-06, "loss": 0.7733, "step": 20603 }, { "epoch": 0.7277704435977023, "grad_norm": 1.8244142532348633, "learning_rate": 1.820784825259735e-06, "loss": 0.7505, "step": 20604 }, { "epoch": 0.7278057654014102, "grad_norm": 2.1138875484466553, "learning_rate": 1.8203433583189722e-06, "loss": 0.7842, "step": 20605 }, { "epoch": 0.7278410872051181, "grad_norm": 2.657132148742676, "learning_rate": 1.8199019329929585e-06, "loss": 0.7513, "step": 20606 }, { "epoch": 0.727876409008826, "grad_norm": 1.6971491575241089, "learning_rate": 1.8194605492874722e-06, "loss": 0.7615, "step": 20607 }, { "epoch": 0.7279117308125339, "grad_norm": 1.9509786367416382, "learning_rate": 1.8190192072082862e-06, "loss": 0.7955, "step": 20608 }, { "epoch": 0.7279470526162418, "grad_norm": 2.1740145683288574, "learning_rate": 1.8185779067611791e-06, "loss": 0.8027, "step": 20609 }, { "epoch": 0.7279823744199497, "grad_norm": 2.439375638961792, "learning_rate": 1.818136647951928e-06, "loss": 0.7233, "step": 20610 }, { "epoch": 0.7280176962236576, "grad_norm": 1.6754589080810547, "learning_rate": 1.8176954307863048e-06, "loss": 0.7564, "step": 20611 }, { "epoch": 0.7280530180273656, "grad_norm": 1.776710033416748, "learning_rate": 1.8172542552700857e-06, "loss": 0.785, "step": 20612 }, { "epoch": 0.7280883398310735, "grad_norm": 1.965785264968872, "learning_rate": 1.8168131214090462e-06, "loss": 0.7656, "step": 20613 }, { "epoch": 0.7281236616347814, "grad_norm": 7.214874744415283, "learning_rate": 1.816372029208956e-06, "loss": 0.7789, "step": 20614 }, { "epoch": 0.7281589834384893, "grad_norm": 1.6703650951385498, "learning_rate": 1.8159309786755907e-06, "loss": 0.7621, "step": 20615 }, { "epoch": 0.7281943052421972, "grad_norm": 2.187638998031616, "learning_rate": 1.8154899698147239e-06, "loss": 0.7403, "step": 20616 }, { "epoch": 0.7282296270459051, "grad_norm": 1.8985521793365479, "learning_rate": 1.8150490026321238e-06, "loss": 0.8153, "step": 20617 }, { "epoch": 0.728264948849613, "grad_norm": 1.7402093410491943, "learning_rate": 1.814608077133564e-06, "loss": 0.7717, "step": 20618 }, { "epoch": 0.7283002706533209, "grad_norm": 1.6963053941726685, "learning_rate": 1.8141671933248162e-06, "loss": 0.7717, "step": 20619 }, { "epoch": 0.7283355924570288, "grad_norm": 1.6651216745376587, "learning_rate": 1.81372635121165e-06, "loss": 0.7651, "step": 20620 }, { "epoch": 0.7283709142607367, "grad_norm": 1.9277372360229492, "learning_rate": 1.813285550799832e-06, "loss": 0.7596, "step": 20621 }, { "epoch": 0.7284062360644447, "grad_norm": 1.6992744207382202, "learning_rate": 1.8128447920951335e-06, "loss": 0.7719, "step": 20622 }, { "epoch": 0.7284415578681526, "grad_norm": 1.7853301763534546, "learning_rate": 1.8124040751033256e-06, "loss": 0.782, "step": 20623 }, { "epoch": 0.7284768796718605, "grad_norm": 1.6895920038223267, "learning_rate": 1.8119633998301716e-06, "loss": 0.7306, "step": 20624 }, { "epoch": 0.7285122014755684, "grad_norm": 1.7525123357772827, "learning_rate": 1.811522766281442e-06, "loss": 0.7928, "step": 20625 }, { "epoch": 0.7285475232792763, "grad_norm": 1.7840927839279175, "learning_rate": 1.8110821744629042e-06, "loss": 0.8015, "step": 20626 }, { "epoch": 0.7285828450829842, "grad_norm": 1.9392004013061523, "learning_rate": 1.8106416243803221e-06, "loss": 0.7729, "step": 20627 }, { "epoch": 0.7286181668866921, "grad_norm": 1.8254659175872803, "learning_rate": 1.8102011160394627e-06, "loss": 0.7551, "step": 20628 }, { "epoch": 0.7286534886904, "grad_norm": 1.6431174278259277, "learning_rate": 1.8097606494460934e-06, "loss": 0.7532, "step": 20629 }, { "epoch": 0.7286888104941078, "grad_norm": 1.742905855178833, "learning_rate": 1.8093202246059755e-06, "loss": 0.7586, "step": 20630 }, { "epoch": 0.7287241322978157, "grad_norm": 1.6174710988998413, "learning_rate": 1.8088798415248743e-06, "loss": 0.7657, "step": 20631 }, { "epoch": 0.7287594541015237, "grad_norm": 2.025973320007324, "learning_rate": 1.8084395002085559e-06, "loss": 0.7954, "step": 20632 }, { "epoch": 0.7287947759052316, "grad_norm": 1.887343406677246, "learning_rate": 1.8079992006627795e-06, "loss": 0.7648, "step": 20633 }, { "epoch": 0.7288300977089395, "grad_norm": 1.5596191883087158, "learning_rate": 1.8075589428933093e-06, "loss": 0.7784, "step": 20634 }, { "epoch": 0.7288654195126474, "grad_norm": 1.682928442955017, "learning_rate": 1.8071187269059093e-06, "loss": 0.7743, "step": 20635 }, { "epoch": 0.7289007413163553, "grad_norm": 1.908973217010498, "learning_rate": 1.806678552706338e-06, "loss": 0.766, "step": 20636 }, { "epoch": 0.7289360631200632, "grad_norm": 1.5505969524383545, "learning_rate": 1.8062384203003568e-06, "loss": 0.7237, "step": 20637 }, { "epoch": 0.7289713849237711, "grad_norm": 1.695914387702942, "learning_rate": 1.8057983296937288e-06, "loss": 0.7844, "step": 20638 }, { "epoch": 0.729006706727479, "grad_norm": 1.6559691429138184, "learning_rate": 1.8053582808922122e-06, "loss": 0.762, "step": 20639 }, { "epoch": 0.7290420285311869, "grad_norm": 1.5448267459869385, "learning_rate": 1.8049182739015636e-06, "loss": 0.7449, "step": 20640 }, { "epoch": 0.7290773503348948, "grad_norm": 1.7999342679977417, "learning_rate": 1.804478308727544e-06, "loss": 0.7685, "step": 20641 }, { "epoch": 0.7291126721386028, "grad_norm": 2.1630191802978516, "learning_rate": 1.804038385375914e-06, "loss": 0.7615, "step": 20642 }, { "epoch": 0.7291479939423107, "grad_norm": 1.7595105171203613, "learning_rate": 1.8035985038524268e-06, "loss": 0.781, "step": 20643 }, { "epoch": 0.7291833157460186, "grad_norm": 2.0681509971618652, "learning_rate": 1.8031586641628412e-06, "loss": 0.7348, "step": 20644 }, { "epoch": 0.7292186375497265, "grad_norm": 1.8680169582366943, "learning_rate": 1.8027188663129164e-06, "loss": 0.7559, "step": 20645 }, { "epoch": 0.7292539593534344, "grad_norm": 0.8298957347869873, "learning_rate": 1.8022791103084035e-06, "loss": 0.5741, "step": 20646 }, { "epoch": 0.7292892811571423, "grad_norm": 1.6888030767440796, "learning_rate": 1.8018393961550613e-06, "loss": 0.7707, "step": 20647 }, { "epoch": 0.7293246029608502, "grad_norm": 1.6523226499557495, "learning_rate": 1.801399723858645e-06, "loss": 0.7849, "step": 20648 }, { "epoch": 0.7293599247645581, "grad_norm": 1.6807516813278198, "learning_rate": 1.8009600934249066e-06, "loss": 0.7814, "step": 20649 }, { "epoch": 0.729395246568266, "grad_norm": 1.7523608207702637, "learning_rate": 1.8005205048596008e-06, "loss": 0.7776, "step": 20650 }, { "epoch": 0.729430568371974, "grad_norm": 1.6289119720458984, "learning_rate": 1.800080958168483e-06, "loss": 0.7612, "step": 20651 }, { "epoch": 0.7294658901756819, "grad_norm": 1.7672221660614014, "learning_rate": 1.7996414533573027e-06, "loss": 0.7393, "step": 20652 }, { "epoch": 0.7295012119793898, "grad_norm": 1.9255784749984741, "learning_rate": 1.799201990431813e-06, "loss": 0.7427, "step": 20653 }, { "epoch": 0.7295365337830977, "grad_norm": 1.6368308067321777, "learning_rate": 1.7987625693977678e-06, "loss": 0.7706, "step": 20654 }, { "epoch": 0.7295718555868056, "grad_norm": 1.8030766248703003, "learning_rate": 1.798323190260915e-06, "loss": 0.7811, "step": 20655 }, { "epoch": 0.7296071773905134, "grad_norm": 1.5551226139068604, "learning_rate": 1.797883853027006e-06, "loss": 0.749, "step": 20656 }, { "epoch": 0.7296424991942213, "grad_norm": 1.764501929283142, "learning_rate": 1.797444557701793e-06, "loss": 0.7782, "step": 20657 }, { "epoch": 0.7296778209979292, "grad_norm": 1.829068899154663, "learning_rate": 1.7970053042910235e-06, "loss": 0.7683, "step": 20658 }, { "epoch": 0.7297131428016371, "grad_norm": 1.6712859869003296, "learning_rate": 1.7965660928004452e-06, "loss": 0.7426, "step": 20659 }, { "epoch": 0.729748464605345, "grad_norm": 1.5927594900131226, "learning_rate": 1.7961269232358076e-06, "loss": 0.7724, "step": 20660 }, { "epoch": 0.729783786409053, "grad_norm": 1.6705329418182373, "learning_rate": 1.7956877956028595e-06, "loss": 0.7665, "step": 20661 }, { "epoch": 0.7298191082127609, "grad_norm": 1.8885778188705444, "learning_rate": 1.7952487099073463e-06, "loss": 0.75, "step": 20662 }, { "epoch": 0.7298544300164688, "grad_norm": 1.7585468292236328, "learning_rate": 1.7948096661550152e-06, "loss": 0.7958, "step": 20663 }, { "epoch": 0.7298897518201767, "grad_norm": 1.5294370651245117, "learning_rate": 1.7943706643516146e-06, "loss": 0.7526, "step": 20664 }, { "epoch": 0.7299250736238846, "grad_norm": 1.784061074256897, "learning_rate": 1.7939317045028865e-06, "loss": 0.7888, "step": 20665 }, { "epoch": 0.7299603954275925, "grad_norm": 2.0585358142852783, "learning_rate": 1.7934927866145774e-06, "loss": 0.7538, "step": 20666 }, { "epoch": 0.7299957172313004, "grad_norm": 1.5520868301391602, "learning_rate": 1.7930539106924322e-06, "loss": 0.765, "step": 20667 }, { "epoch": 0.7300310390350083, "grad_norm": 1.9956722259521484, "learning_rate": 1.7926150767421957e-06, "loss": 0.7646, "step": 20668 }, { "epoch": 0.7300663608387162, "grad_norm": 1.5685014724731445, "learning_rate": 1.792176284769609e-06, "loss": 0.7505, "step": 20669 }, { "epoch": 0.7301016826424241, "grad_norm": 5.184651851654053, "learning_rate": 1.791737534780416e-06, "loss": 0.7507, "step": 20670 }, { "epoch": 0.730137004446132, "grad_norm": 2.2317073345184326, "learning_rate": 1.7912988267803605e-06, "loss": 0.7854, "step": 20671 }, { "epoch": 0.73017232624984, "grad_norm": 2.2121422290802, "learning_rate": 1.7908601607751812e-06, "loss": 0.7898, "step": 20672 }, { "epoch": 0.7302076480535479, "grad_norm": 1.7070246934890747, "learning_rate": 1.7904215367706207e-06, "loss": 0.7726, "step": 20673 }, { "epoch": 0.7302429698572558, "grad_norm": 1.4923882484436035, "learning_rate": 1.7899829547724219e-06, "loss": 0.717, "step": 20674 }, { "epoch": 0.7302782916609637, "grad_norm": 1.636528491973877, "learning_rate": 1.7895444147863229e-06, "loss": 0.7873, "step": 20675 }, { "epoch": 0.7303136134646716, "grad_norm": 2.35831618309021, "learning_rate": 1.7891059168180608e-06, "loss": 0.793, "step": 20676 }, { "epoch": 0.7303489352683795, "grad_norm": 1.6838001012802124, "learning_rate": 1.7886674608733772e-06, "loss": 0.7736, "step": 20677 }, { "epoch": 0.7303842570720874, "grad_norm": 1.8207346200942993, "learning_rate": 1.7882290469580116e-06, "loss": 0.7517, "step": 20678 }, { "epoch": 0.7304195788757953, "grad_norm": 1.8071725368499756, "learning_rate": 1.7877906750776992e-06, "loss": 0.7678, "step": 20679 }, { "epoch": 0.7304549006795032, "grad_norm": 2.0342414379119873, "learning_rate": 1.7873523452381785e-06, "loss": 0.7801, "step": 20680 }, { "epoch": 0.7304902224832112, "grad_norm": 2.03639554977417, "learning_rate": 1.7869140574451883e-06, "loss": 0.7806, "step": 20681 }, { "epoch": 0.730525544286919, "grad_norm": 1.628766655921936, "learning_rate": 1.7864758117044606e-06, "loss": 0.7689, "step": 20682 }, { "epoch": 0.7305608660906269, "grad_norm": 1.8388479948043823, "learning_rate": 1.786037608021734e-06, "loss": 0.8035, "step": 20683 }, { "epoch": 0.7305961878943348, "grad_norm": 1.8231152296066284, "learning_rate": 1.7855994464027443e-06, "loss": 0.7816, "step": 20684 }, { "epoch": 0.7306315096980427, "grad_norm": 1.8588250875473022, "learning_rate": 1.7851613268532237e-06, "loss": 0.7755, "step": 20685 }, { "epoch": 0.7306668315017506, "grad_norm": 1.7413711547851562, "learning_rate": 1.7847232493789068e-06, "loss": 0.7749, "step": 20686 }, { "epoch": 0.7307021533054585, "grad_norm": 1.825791358947754, "learning_rate": 1.7842852139855292e-06, "loss": 0.8147, "step": 20687 }, { "epoch": 0.7307374751091664, "grad_norm": 1.6452512741088867, "learning_rate": 1.783847220678821e-06, "loss": 0.7616, "step": 20688 }, { "epoch": 0.7307727969128743, "grad_norm": 1.6165236234664917, "learning_rate": 1.7834092694645156e-06, "loss": 0.7665, "step": 20689 }, { "epoch": 0.7308081187165822, "grad_norm": 1.567253828048706, "learning_rate": 1.7829713603483467e-06, "loss": 0.7842, "step": 20690 }, { "epoch": 0.7308434405202902, "grad_norm": 1.7148138284683228, "learning_rate": 1.7825334933360422e-06, "loss": 0.7649, "step": 20691 }, { "epoch": 0.7308787623239981, "grad_norm": 1.827944278717041, "learning_rate": 1.7820956684333347e-06, "loss": 0.7609, "step": 20692 }, { "epoch": 0.730914084127706, "grad_norm": 1.7404941320419312, "learning_rate": 1.7816578856459554e-06, "loss": 0.7967, "step": 20693 }, { "epoch": 0.7309494059314139, "grad_norm": 1.8606233596801758, "learning_rate": 1.781220144979633e-06, "loss": 0.7767, "step": 20694 }, { "epoch": 0.7309847277351218, "grad_norm": 1.6010010242462158, "learning_rate": 1.7807824464400946e-06, "loss": 0.7573, "step": 20695 }, { "epoch": 0.7310200495388297, "grad_norm": 3.14672589302063, "learning_rate": 1.7803447900330707e-06, "loss": 0.7681, "step": 20696 }, { "epoch": 0.7310553713425376, "grad_norm": 2.4184138774871826, "learning_rate": 1.7799071757642899e-06, "loss": 0.7385, "step": 20697 }, { "epoch": 0.7310906931462455, "grad_norm": 1.6597024202346802, "learning_rate": 1.7794696036394777e-06, "loss": 0.752, "step": 20698 }, { "epoch": 0.7311260149499534, "grad_norm": 1.7653530836105347, "learning_rate": 1.7790320736643618e-06, "loss": 0.7647, "step": 20699 }, { "epoch": 0.7311613367536614, "grad_norm": 1.9220023155212402, "learning_rate": 1.77859458584467e-06, "loss": 0.7606, "step": 20700 }, { "epoch": 0.7311966585573693, "grad_norm": 2.0478458404541016, "learning_rate": 1.7781571401861253e-06, "loss": 0.7824, "step": 20701 }, { "epoch": 0.7312319803610772, "grad_norm": 1.6928857564926147, "learning_rate": 1.7777197366944543e-06, "loss": 0.7899, "step": 20702 }, { "epoch": 0.7312673021647851, "grad_norm": 1.5840117931365967, "learning_rate": 1.7772823753753832e-06, "loss": 0.7654, "step": 20703 }, { "epoch": 0.731302623968493, "grad_norm": 1.8867870569229126, "learning_rate": 1.7768450562346328e-06, "loss": 0.763, "step": 20704 }, { "epoch": 0.7313379457722009, "grad_norm": 1.5553785562515259, "learning_rate": 1.7764077792779289e-06, "loss": 0.7908, "step": 20705 }, { "epoch": 0.7313732675759088, "grad_norm": 1.679702877998352, "learning_rate": 1.7759705445109954e-06, "loss": 0.7624, "step": 20706 }, { "epoch": 0.7314085893796167, "grad_norm": 1.635582685470581, "learning_rate": 1.7755333519395513e-06, "loss": 0.7409, "step": 20707 }, { "epoch": 0.7314439111833245, "grad_norm": 1.8099288940429688, "learning_rate": 1.7750962015693207e-06, "loss": 0.7857, "step": 20708 }, { "epoch": 0.7314792329870324, "grad_norm": 0.8200310468673706, "learning_rate": 1.774659093406027e-06, "loss": 0.5574, "step": 20709 }, { "epoch": 0.7315145547907403, "grad_norm": 1.7647043466567993, "learning_rate": 1.7742220274553868e-06, "loss": 0.7432, "step": 20710 }, { "epoch": 0.7315498765944483, "grad_norm": 1.9692661762237549, "learning_rate": 1.7737850037231224e-06, "loss": 0.779, "step": 20711 }, { "epoch": 0.7315851983981562, "grad_norm": 1.876776099205017, "learning_rate": 1.773348022214955e-06, "loss": 0.7904, "step": 20712 }, { "epoch": 0.7316205202018641, "grad_norm": 1.7928699254989624, "learning_rate": 1.772911082936602e-06, "loss": 0.7949, "step": 20713 }, { "epoch": 0.731655842005572, "grad_norm": 1.7722721099853516, "learning_rate": 1.7724741858937805e-06, "loss": 0.7796, "step": 20714 }, { "epoch": 0.7316911638092799, "grad_norm": 1.6986885070800781, "learning_rate": 1.7720373310922107e-06, "loss": 0.7186, "step": 20715 }, { "epoch": 0.7317264856129878, "grad_norm": 1.7468063831329346, "learning_rate": 1.7716005185376106e-06, "loss": 0.7655, "step": 20716 }, { "epoch": 0.7317618074166957, "grad_norm": 1.8223329782485962, "learning_rate": 1.7711637482356942e-06, "loss": 0.7999, "step": 20717 }, { "epoch": 0.7317971292204036, "grad_norm": 1.6789647340774536, "learning_rate": 1.77072702019218e-06, "loss": 0.7682, "step": 20718 }, { "epoch": 0.7318324510241115, "grad_norm": 1.7182577848434448, "learning_rate": 1.7702903344127853e-06, "loss": 0.7489, "step": 20719 }, { "epoch": 0.7318677728278195, "grad_norm": 2.089862823486328, "learning_rate": 1.769853690903222e-06, "loss": 0.7806, "step": 20720 }, { "epoch": 0.7319030946315274, "grad_norm": 1.6543115377426147, "learning_rate": 1.7694170896692058e-06, "loss": 0.7508, "step": 20721 }, { "epoch": 0.7319384164352353, "grad_norm": 1.7729628086090088, "learning_rate": 1.7689805307164537e-06, "loss": 0.7805, "step": 20722 }, { "epoch": 0.7319737382389432, "grad_norm": 1.6629700660705566, "learning_rate": 1.7685440140506755e-06, "loss": 0.779, "step": 20723 }, { "epoch": 0.7320090600426511, "grad_norm": 1.6616182327270508, "learning_rate": 1.7681075396775855e-06, "loss": 0.8201, "step": 20724 }, { "epoch": 0.732044381846359, "grad_norm": 1.764885425567627, "learning_rate": 1.7676711076028964e-06, "loss": 0.7706, "step": 20725 }, { "epoch": 0.7320797036500669, "grad_norm": 1.7224546670913696, "learning_rate": 1.7672347178323217e-06, "loss": 0.7721, "step": 20726 }, { "epoch": 0.7321150254537748, "grad_norm": 1.7351830005645752, "learning_rate": 1.7667983703715703e-06, "loss": 0.8012, "step": 20727 }, { "epoch": 0.7321503472574827, "grad_norm": 1.6447412967681885, "learning_rate": 1.7663620652263535e-06, "loss": 0.7297, "step": 20728 }, { "epoch": 0.7321856690611906, "grad_norm": 2.1338276863098145, "learning_rate": 1.765925802402384e-06, "loss": 0.7773, "step": 20729 }, { "epoch": 0.7322209908648986, "grad_norm": 1.9599831104278564, "learning_rate": 1.765489581905368e-06, "loss": 0.7798, "step": 20730 }, { "epoch": 0.7322563126686065, "grad_norm": 1.6241440773010254, "learning_rate": 1.7650534037410182e-06, "loss": 0.7551, "step": 20731 }, { "epoch": 0.7322916344723144, "grad_norm": 1.806329607963562, "learning_rate": 1.7646172679150409e-06, "loss": 0.7756, "step": 20732 }, { "epoch": 0.7323269562760223, "grad_norm": 1.6986373662948608, "learning_rate": 1.7641811744331433e-06, "loss": 0.765, "step": 20733 }, { "epoch": 0.7323622780797301, "grad_norm": 1.6945406198501587, "learning_rate": 1.7637451233010339e-06, "loss": 0.7562, "step": 20734 }, { "epoch": 0.732397599883438, "grad_norm": 1.7411240339279175, "learning_rate": 1.7633091145244203e-06, "loss": 0.7472, "step": 20735 }, { "epoch": 0.7324329216871459, "grad_norm": 1.647639274597168, "learning_rate": 1.76287314810901e-06, "loss": 0.748, "step": 20736 }, { "epoch": 0.7324682434908538, "grad_norm": 1.7231453657150269, "learning_rate": 1.7624372240605059e-06, "loss": 0.7858, "step": 20737 }, { "epoch": 0.7325035652945617, "grad_norm": 1.5673459768295288, "learning_rate": 1.762001342384615e-06, "loss": 0.7587, "step": 20738 }, { "epoch": 0.7325388870982696, "grad_norm": 1.6755826473236084, "learning_rate": 1.7615655030870431e-06, "loss": 0.764, "step": 20739 }, { "epoch": 0.7325742089019776, "grad_norm": 1.8982988595962524, "learning_rate": 1.7611297061734918e-06, "loss": 0.7578, "step": 20740 }, { "epoch": 0.7326095307056855, "grad_norm": 1.7413312196731567, "learning_rate": 1.7606939516496663e-06, "loss": 0.7954, "step": 20741 }, { "epoch": 0.7326448525093934, "grad_norm": 1.6610956192016602, "learning_rate": 1.7602582395212704e-06, "loss": 0.779, "step": 20742 }, { "epoch": 0.7326801743131013, "grad_norm": 2.119725227355957, "learning_rate": 1.7598225697940048e-06, "loss": 0.7989, "step": 20743 }, { "epoch": 0.7327154961168092, "grad_norm": 1.7392077445983887, "learning_rate": 1.7593869424735716e-06, "loss": 0.7856, "step": 20744 }, { "epoch": 0.7327508179205171, "grad_norm": 2.7874956130981445, "learning_rate": 1.7589513575656747e-06, "loss": 0.7424, "step": 20745 }, { "epoch": 0.732786139724225, "grad_norm": 1.7255936861038208, "learning_rate": 1.7585158150760117e-06, "loss": 0.76, "step": 20746 }, { "epoch": 0.7328214615279329, "grad_norm": 1.8923726081848145, "learning_rate": 1.7580803150102843e-06, "loss": 0.847, "step": 20747 }, { "epoch": 0.7328567833316408, "grad_norm": 1.6211504936218262, "learning_rate": 1.7576448573741945e-06, "loss": 0.7529, "step": 20748 }, { "epoch": 0.7328921051353487, "grad_norm": 1.7472883462905884, "learning_rate": 1.7572094421734387e-06, "loss": 0.7837, "step": 20749 }, { "epoch": 0.7329274269390567, "grad_norm": 1.8421343564987183, "learning_rate": 1.7567740694137147e-06, "loss": 0.7642, "step": 20750 }, { "epoch": 0.7329627487427646, "grad_norm": 1.5089834928512573, "learning_rate": 1.756338739100722e-06, "loss": 0.7244, "step": 20751 }, { "epoch": 0.7329980705464725, "grad_norm": 1.6963692903518677, "learning_rate": 1.7559034512401596e-06, "loss": 0.7562, "step": 20752 }, { "epoch": 0.7330333923501804, "grad_norm": 1.899070143699646, "learning_rate": 1.7554682058377214e-06, "loss": 0.8048, "step": 20753 }, { "epoch": 0.7330687141538883, "grad_norm": 1.5985983610153198, "learning_rate": 1.7550330028991054e-06, "loss": 0.7586, "step": 20754 }, { "epoch": 0.7331040359575962, "grad_norm": 1.702760100364685, "learning_rate": 1.754597842430009e-06, "loss": 0.7791, "step": 20755 }, { "epoch": 0.7331393577613041, "grad_norm": 1.710121750831604, "learning_rate": 1.7541627244361247e-06, "loss": 0.796, "step": 20756 }, { "epoch": 0.733174679565012, "grad_norm": 1.81357741355896, "learning_rate": 1.7537276489231486e-06, "loss": 0.7719, "step": 20757 }, { "epoch": 0.7332100013687199, "grad_norm": 1.6694495677947998, "learning_rate": 1.753292615896776e-06, "loss": 0.7685, "step": 20758 }, { "epoch": 0.7332453231724279, "grad_norm": 1.6582448482513428, "learning_rate": 1.7528576253626983e-06, "loss": 0.7314, "step": 20759 }, { "epoch": 0.7332806449761357, "grad_norm": 1.5911601781845093, "learning_rate": 1.752422677326609e-06, "loss": 0.7725, "step": 20760 }, { "epoch": 0.7333159667798436, "grad_norm": 1.7960344552993774, "learning_rate": 1.751987771794203e-06, "loss": 0.7661, "step": 20761 }, { "epoch": 0.7333512885835515, "grad_norm": 1.674206018447876, "learning_rate": 1.751552908771169e-06, "loss": 0.7961, "step": 20762 }, { "epoch": 0.7333866103872594, "grad_norm": 1.9323976039886475, "learning_rate": 1.7511180882632e-06, "loss": 0.7898, "step": 20763 }, { "epoch": 0.7334219321909673, "grad_norm": 1.7130866050720215, "learning_rate": 1.750683310275988e-06, "loss": 0.7748, "step": 20764 }, { "epoch": 0.7334572539946752, "grad_norm": 1.8478941917419434, "learning_rate": 1.7502485748152203e-06, "loss": 0.7784, "step": 20765 }, { "epoch": 0.7334925757983831, "grad_norm": 1.6018779277801514, "learning_rate": 1.749813881886589e-06, "loss": 0.7407, "step": 20766 }, { "epoch": 0.733527897602091, "grad_norm": 0.9524805545806885, "learning_rate": 1.7493792314957841e-06, "loss": 0.5721, "step": 20767 }, { "epoch": 0.7335632194057989, "grad_norm": 1.6146520376205444, "learning_rate": 1.7489446236484925e-06, "loss": 0.7137, "step": 20768 }, { "epoch": 0.7335985412095068, "grad_norm": 1.6415339708328247, "learning_rate": 1.7485100583504011e-06, "loss": 0.7668, "step": 20769 }, { "epoch": 0.7336338630132148, "grad_norm": 1.7094241380691528, "learning_rate": 1.7480755356071989e-06, "loss": 0.7635, "step": 20770 }, { "epoch": 0.7336691848169227, "grad_norm": 1.609010100364685, "learning_rate": 1.7476410554245737e-06, "loss": 0.7402, "step": 20771 }, { "epoch": 0.7337045066206306, "grad_norm": 1.7253769636154175, "learning_rate": 1.7472066178082104e-06, "loss": 0.7844, "step": 20772 }, { "epoch": 0.7337398284243385, "grad_norm": 1.6564353704452515, "learning_rate": 1.7467722227637946e-06, "loss": 0.7457, "step": 20773 }, { "epoch": 0.7337751502280464, "grad_norm": 1.870538353919983, "learning_rate": 1.7463378702970147e-06, "loss": 0.7517, "step": 20774 }, { "epoch": 0.7338104720317543, "grad_norm": 0.8661381602287292, "learning_rate": 1.745903560413551e-06, "loss": 0.5666, "step": 20775 }, { "epoch": 0.7338457938354622, "grad_norm": 1.9366916418075562, "learning_rate": 1.7454692931190898e-06, "loss": 0.7712, "step": 20776 }, { "epoch": 0.7338811156391701, "grad_norm": 1.52235746383667, "learning_rate": 1.7450350684193168e-06, "loss": 0.7295, "step": 20777 }, { "epoch": 0.733916437442878, "grad_norm": 1.9035745859146118, "learning_rate": 1.744600886319911e-06, "loss": 0.7525, "step": 20778 }, { "epoch": 0.733951759246586, "grad_norm": 1.5891050100326538, "learning_rate": 1.7441667468265566e-06, "loss": 0.7553, "step": 20779 }, { "epoch": 0.7339870810502939, "grad_norm": 1.6615056991577148, "learning_rate": 1.743732649944938e-06, "loss": 0.7573, "step": 20780 }, { "epoch": 0.7340224028540018, "grad_norm": 1.619615077972412, "learning_rate": 1.7432985956807324e-06, "loss": 0.7678, "step": 20781 }, { "epoch": 0.7340577246577097, "grad_norm": 1.998688817024231, "learning_rate": 1.7428645840396224e-06, "loss": 0.7655, "step": 20782 }, { "epoch": 0.7340930464614176, "grad_norm": 1.655551552772522, "learning_rate": 1.742430615027289e-06, "loss": 0.7593, "step": 20783 }, { "epoch": 0.7341283682651255, "grad_norm": 1.695365071296692, "learning_rate": 1.7419966886494128e-06, "loss": 0.7418, "step": 20784 }, { "epoch": 0.7341636900688334, "grad_norm": 1.5873968601226807, "learning_rate": 1.7415628049116695e-06, "loss": 0.7201, "step": 20785 }, { "epoch": 0.7341990118725412, "grad_norm": 1.8410656452178955, "learning_rate": 1.7411289638197414e-06, "loss": 0.7836, "step": 20786 }, { "epoch": 0.7342343336762491, "grad_norm": 1.8972514867782593, "learning_rate": 1.7406951653793052e-06, "loss": 0.763, "step": 20787 }, { "epoch": 0.734269655479957, "grad_norm": 1.8307344913482666, "learning_rate": 1.7402614095960363e-06, "loss": 0.7672, "step": 20788 }, { "epoch": 0.734304977283665, "grad_norm": 1.6884725093841553, "learning_rate": 1.7398276964756127e-06, "loss": 0.7866, "step": 20789 }, { "epoch": 0.7343402990873729, "grad_norm": 1.5141751766204834, "learning_rate": 1.7393940260237135e-06, "loss": 0.7731, "step": 20790 }, { "epoch": 0.7343756208910808, "grad_norm": 1.7491027116775513, "learning_rate": 1.7389603982460107e-06, "loss": 0.7708, "step": 20791 }, { "epoch": 0.7344109426947887, "grad_norm": 1.619989037513733, "learning_rate": 1.7385268131481813e-06, "loss": 0.7694, "step": 20792 }, { "epoch": 0.7344462644984966, "grad_norm": 1.5960123538970947, "learning_rate": 1.7380932707359e-06, "loss": 0.7794, "step": 20793 }, { "epoch": 0.7344815863022045, "grad_norm": 1.7344447374343872, "learning_rate": 1.737659771014842e-06, "loss": 0.7808, "step": 20794 }, { "epoch": 0.7345169081059124, "grad_norm": 1.805977463722229, "learning_rate": 1.737226313990678e-06, "loss": 0.8325, "step": 20795 }, { "epoch": 0.7345522299096203, "grad_norm": 1.7395663261413574, "learning_rate": 1.736792899669083e-06, "loss": 0.7724, "step": 20796 }, { "epoch": 0.7345875517133282, "grad_norm": 2.0395920276641846, "learning_rate": 1.7363595280557311e-06, "loss": 0.7419, "step": 20797 }, { "epoch": 0.7346228735170361, "grad_norm": 1.759688377380371, "learning_rate": 1.73592619915629e-06, "loss": 0.7568, "step": 20798 }, { "epoch": 0.7346581953207441, "grad_norm": 1.7082206010818481, "learning_rate": 1.7354929129764332e-06, "loss": 0.7894, "step": 20799 }, { "epoch": 0.734693517124452, "grad_norm": 1.687851071357727, "learning_rate": 1.7350596695218331e-06, "loss": 0.7442, "step": 20800 }, { "epoch": 0.7347288389281599, "grad_norm": 4.470897197723389, "learning_rate": 1.7346264687981568e-06, "loss": 0.7943, "step": 20801 }, { "epoch": 0.7347641607318678, "grad_norm": 1.887230396270752, "learning_rate": 1.734193310811076e-06, "loss": 0.7552, "step": 20802 }, { "epoch": 0.7347994825355757, "grad_norm": 1.6586439609527588, "learning_rate": 1.7337601955662603e-06, "loss": 0.7957, "step": 20803 }, { "epoch": 0.7348348043392836, "grad_norm": 1.6190743446350098, "learning_rate": 1.7333271230693754e-06, "loss": 0.7882, "step": 20804 }, { "epoch": 0.7348701261429915, "grad_norm": 1.6680619716644287, "learning_rate": 1.7328940933260929e-06, "loss": 0.7881, "step": 20805 }, { "epoch": 0.7349054479466994, "grad_norm": 1.597181797027588, "learning_rate": 1.7324611063420765e-06, "loss": 0.7693, "step": 20806 }, { "epoch": 0.7349407697504073, "grad_norm": 1.71852445602417, "learning_rate": 1.7320281621229968e-06, "loss": 0.7884, "step": 20807 }, { "epoch": 0.7349760915541153, "grad_norm": 1.6553634405136108, "learning_rate": 1.7315952606745167e-06, "loss": 0.7669, "step": 20808 }, { "epoch": 0.7350114133578232, "grad_norm": 1.7577769756317139, "learning_rate": 1.7311624020023033e-06, "loss": 0.7507, "step": 20809 }, { "epoch": 0.7350467351615311, "grad_norm": 1.6719380617141724, "learning_rate": 1.7307295861120233e-06, "loss": 0.7445, "step": 20810 }, { "epoch": 0.735082056965239, "grad_norm": 1.7280704975128174, "learning_rate": 1.7302968130093383e-06, "loss": 0.7632, "step": 20811 }, { "epoch": 0.7351173787689468, "grad_norm": 1.556295394897461, "learning_rate": 1.7298640826999146e-06, "loss": 0.7643, "step": 20812 }, { "epoch": 0.7351527005726547, "grad_norm": 1.6577683687210083, "learning_rate": 1.729431395189416e-06, "loss": 0.7531, "step": 20813 }, { "epoch": 0.7351880223763626, "grad_norm": 1.767522931098938, "learning_rate": 1.7289987504835031e-06, "loss": 0.8016, "step": 20814 }, { "epoch": 0.7352233441800705, "grad_norm": 1.7155271768569946, "learning_rate": 1.7285661485878397e-06, "loss": 0.7797, "step": 20815 }, { "epoch": 0.7352586659837784, "grad_norm": 1.751991629600525, "learning_rate": 1.7281335895080891e-06, "loss": 0.7683, "step": 20816 }, { "epoch": 0.7352939877874863, "grad_norm": 1.9656546115875244, "learning_rate": 1.72770107324991e-06, "loss": 0.7592, "step": 20817 }, { "epoch": 0.7353293095911942, "grad_norm": 1.7346373796463013, "learning_rate": 1.7272685998189636e-06, "loss": 0.7718, "step": 20818 }, { "epoch": 0.7353646313949022, "grad_norm": 1.7706421613693237, "learning_rate": 1.7268361692209123e-06, "loss": 0.7571, "step": 20819 }, { "epoch": 0.7353999531986101, "grad_norm": 1.6735749244689941, "learning_rate": 1.726403781461412e-06, "loss": 0.7568, "step": 20820 }, { "epoch": 0.735435275002318, "grad_norm": 2.9775071144104004, "learning_rate": 1.7259714365461239e-06, "loss": 0.757, "step": 20821 }, { "epoch": 0.7354705968060259, "grad_norm": 1.7935187816619873, "learning_rate": 1.7255391344807076e-06, "loss": 0.7491, "step": 20822 }, { "epoch": 0.7355059186097338, "grad_norm": 1.8108108043670654, "learning_rate": 1.72510687527082e-06, "loss": 0.7845, "step": 20823 }, { "epoch": 0.7355412404134417, "grad_norm": 1.9249297380447388, "learning_rate": 1.7246746589221164e-06, "loss": 0.7583, "step": 20824 }, { "epoch": 0.7355765622171496, "grad_norm": 1.531324028968811, "learning_rate": 1.7242424854402545e-06, "loss": 0.7466, "step": 20825 }, { "epoch": 0.7356118840208575, "grad_norm": 1.046233892440796, "learning_rate": 1.7238103548308931e-06, "loss": 0.5761, "step": 20826 }, { "epoch": 0.7356472058245654, "grad_norm": 1.558721661567688, "learning_rate": 1.7233782670996847e-06, "loss": 0.7545, "step": 20827 }, { "epoch": 0.7356825276282734, "grad_norm": 1.754740595817566, "learning_rate": 1.7229462222522852e-06, "loss": 0.743, "step": 20828 }, { "epoch": 0.7357178494319813, "grad_norm": 3.5302252769470215, "learning_rate": 1.7225142202943513e-06, "loss": 0.7821, "step": 20829 }, { "epoch": 0.7357531712356892, "grad_norm": 1.626063585281372, "learning_rate": 1.7220822612315336e-06, "loss": 0.7554, "step": 20830 }, { "epoch": 0.7357884930393971, "grad_norm": 1.7085239887237549, "learning_rate": 1.7216503450694871e-06, "loss": 0.7812, "step": 20831 }, { "epoch": 0.735823814843105, "grad_norm": 1.61369788646698, "learning_rate": 1.7212184718138658e-06, "loss": 0.7527, "step": 20832 }, { "epoch": 0.7358591366468129, "grad_norm": 1.6627315282821655, "learning_rate": 1.72078664147032e-06, "loss": 0.7269, "step": 20833 }, { "epoch": 0.7358944584505208, "grad_norm": 1.581726312637329, "learning_rate": 1.7203548540445014e-06, "loss": 0.7654, "step": 20834 }, { "epoch": 0.7359297802542287, "grad_norm": 1.9383379220962524, "learning_rate": 1.7199231095420644e-06, "loss": 0.7872, "step": 20835 }, { "epoch": 0.7359651020579366, "grad_norm": 1.827879786491394, "learning_rate": 1.7194914079686547e-06, "loss": 0.7607, "step": 20836 }, { "epoch": 0.7360004238616445, "grad_norm": 1.670859694480896, "learning_rate": 1.7190597493299248e-06, "loss": 0.7824, "step": 20837 }, { "epoch": 0.7360357456653523, "grad_norm": 1.6970877647399902, "learning_rate": 1.7186281336315264e-06, "loss": 0.7958, "step": 20838 }, { "epoch": 0.7360710674690603, "grad_norm": 2.3187506198883057, "learning_rate": 1.7181965608791045e-06, "loss": 0.8052, "step": 20839 }, { "epoch": 0.7361063892727682, "grad_norm": 1.9449760913848877, "learning_rate": 1.717765031078309e-06, "loss": 0.7758, "step": 20840 }, { "epoch": 0.7361417110764761, "grad_norm": 1.0468581914901733, "learning_rate": 1.7173335442347888e-06, "loss": 0.5636, "step": 20841 }, { "epoch": 0.736177032880184, "grad_norm": 1.5573309659957886, "learning_rate": 1.7169021003541909e-06, "loss": 0.7662, "step": 20842 }, { "epoch": 0.7362123546838919, "grad_norm": 1.645012378692627, "learning_rate": 1.7164706994421594e-06, "loss": 0.7622, "step": 20843 }, { "epoch": 0.7362476764875998, "grad_norm": 1.7122026681900024, "learning_rate": 1.7160393415043415e-06, "loss": 0.7685, "step": 20844 }, { "epoch": 0.7362829982913077, "grad_norm": 1.6773643493652344, "learning_rate": 1.7156080265463854e-06, "loss": 0.8088, "step": 20845 }, { "epoch": 0.7363183200950156, "grad_norm": 1.6625146865844727, "learning_rate": 1.7151767545739329e-06, "loss": 0.7297, "step": 20846 }, { "epoch": 0.7363536418987235, "grad_norm": 1.705273985862732, "learning_rate": 1.7147455255926287e-06, "loss": 0.802, "step": 20847 }, { "epoch": 0.7363889637024315, "grad_norm": 1.6961911916732788, "learning_rate": 1.7143143396081198e-06, "loss": 0.7578, "step": 20848 }, { "epoch": 0.7364242855061394, "grad_norm": 1.6751905679702759, "learning_rate": 1.713883196626045e-06, "loss": 0.7576, "step": 20849 }, { "epoch": 0.7364596073098473, "grad_norm": 2.0559158325195312, "learning_rate": 1.7134520966520496e-06, "loss": 0.7697, "step": 20850 }, { "epoch": 0.7364949291135552, "grad_norm": 1.7907487154006958, "learning_rate": 1.7130210396917751e-06, "loss": 0.7957, "step": 20851 }, { "epoch": 0.7365302509172631, "grad_norm": 2.58808970451355, "learning_rate": 1.7125900257508653e-06, "loss": 0.8111, "step": 20852 }, { "epoch": 0.736565572720971, "grad_norm": 1.6483052968978882, "learning_rate": 1.7121590548349577e-06, "loss": 0.7468, "step": 20853 }, { "epoch": 0.7366008945246789, "grad_norm": 1.7533332109451294, "learning_rate": 1.7117281269496938e-06, "loss": 0.791, "step": 20854 }, { "epoch": 0.7366362163283868, "grad_norm": 1.7665997743606567, "learning_rate": 1.7112972421007162e-06, "loss": 0.7823, "step": 20855 }, { "epoch": 0.7366715381320947, "grad_norm": 1.6877429485321045, "learning_rate": 1.7108664002936603e-06, "loss": 0.7988, "step": 20856 }, { "epoch": 0.7367068599358026, "grad_norm": 1.6894617080688477, "learning_rate": 1.7104356015341668e-06, "loss": 0.7771, "step": 20857 }, { "epoch": 0.7367421817395106, "grad_norm": 2.0346662998199463, "learning_rate": 1.710004845827875e-06, "loss": 0.7918, "step": 20858 }, { "epoch": 0.7367775035432185, "grad_norm": 1.9381909370422363, "learning_rate": 1.70957413318042e-06, "loss": 0.7801, "step": 20859 }, { "epoch": 0.7368128253469264, "grad_norm": 1.6241198778152466, "learning_rate": 1.7091434635974419e-06, "loss": 0.7654, "step": 20860 }, { "epoch": 0.7368481471506343, "grad_norm": 1.6144094467163086, "learning_rate": 1.7087128370845734e-06, "loss": 0.7425, "step": 20861 }, { "epoch": 0.7368834689543422, "grad_norm": 1.8206162452697754, "learning_rate": 1.7082822536474547e-06, "loss": 0.7771, "step": 20862 }, { "epoch": 0.7369187907580501, "grad_norm": 1.7533080577850342, "learning_rate": 1.7078517132917178e-06, "loss": 0.7677, "step": 20863 }, { "epoch": 0.7369541125617579, "grad_norm": 1.5893594026565552, "learning_rate": 1.707421216022998e-06, "loss": 0.7626, "step": 20864 }, { "epoch": 0.7369894343654658, "grad_norm": 1.9070786237716675, "learning_rate": 1.7069907618469328e-06, "loss": 0.7927, "step": 20865 }, { "epoch": 0.7370247561691737, "grad_norm": 3.3291258811950684, "learning_rate": 1.7065603507691513e-06, "loss": 0.8032, "step": 20866 }, { "epoch": 0.7370600779728816, "grad_norm": 1.900243878364563, "learning_rate": 1.7061299827952887e-06, "loss": 0.7435, "step": 20867 }, { "epoch": 0.7370953997765896, "grad_norm": 0.9207072854042053, "learning_rate": 1.7056996579309798e-06, "loss": 0.5797, "step": 20868 }, { "epoch": 0.7371307215802975, "grad_norm": 1.6275099515914917, "learning_rate": 1.7052693761818528e-06, "loss": 0.7735, "step": 20869 }, { "epoch": 0.7371660433840054, "grad_norm": 1.6245981454849243, "learning_rate": 1.7048391375535406e-06, "loss": 0.7769, "step": 20870 }, { "epoch": 0.7372013651877133, "grad_norm": 1.719332218170166, "learning_rate": 1.7044089420516762e-06, "loss": 0.7698, "step": 20871 }, { "epoch": 0.7372366869914212, "grad_norm": 1.715174674987793, "learning_rate": 1.7039787896818867e-06, "loss": 0.7853, "step": 20872 }, { "epoch": 0.7372720087951291, "grad_norm": 2.0620503425598145, "learning_rate": 1.7035486804498025e-06, "loss": 0.7964, "step": 20873 }, { "epoch": 0.737307330598837, "grad_norm": 1.6902081966400146, "learning_rate": 1.7031186143610557e-06, "loss": 0.7394, "step": 20874 }, { "epoch": 0.7373426524025449, "grad_norm": 1.7386078834533691, "learning_rate": 1.702688591421271e-06, "loss": 0.7919, "step": 20875 }, { "epoch": 0.7373779742062528, "grad_norm": 1.632803201675415, "learning_rate": 1.7022586116360778e-06, "loss": 0.7529, "step": 20876 }, { "epoch": 0.7374132960099608, "grad_norm": 1.6807688474655151, "learning_rate": 1.7018286750111062e-06, "loss": 0.7641, "step": 20877 }, { "epoch": 0.7374486178136687, "grad_norm": 1.662078619003296, "learning_rate": 1.7013987815519805e-06, "loss": 0.7525, "step": 20878 }, { "epoch": 0.7374839396173766, "grad_norm": 1.7498849630355835, "learning_rate": 1.7009689312643263e-06, "loss": 0.8218, "step": 20879 }, { "epoch": 0.7375192614210845, "grad_norm": 1.7643470764160156, "learning_rate": 1.70053912415377e-06, "loss": 0.7874, "step": 20880 }, { "epoch": 0.7375545832247924, "grad_norm": 1.8951057195663452, "learning_rate": 1.7001093602259394e-06, "loss": 0.7828, "step": 20881 }, { "epoch": 0.7375899050285003, "grad_norm": 1.6606471538543701, "learning_rate": 1.6996796394864556e-06, "loss": 0.7801, "step": 20882 }, { "epoch": 0.7376252268322082, "grad_norm": 1.638418197631836, "learning_rate": 1.6992499619409442e-06, "loss": 0.8, "step": 20883 }, { "epoch": 0.7376605486359161, "grad_norm": 0.9129412174224854, "learning_rate": 1.6988203275950304e-06, "loss": 0.5811, "step": 20884 }, { "epoch": 0.737695870439624, "grad_norm": 2.0842833518981934, "learning_rate": 1.698390736454334e-06, "loss": 0.7572, "step": 20885 }, { "epoch": 0.737731192243332, "grad_norm": 1.0058364868164062, "learning_rate": 1.697961188524479e-06, "loss": 0.5801, "step": 20886 }, { "epoch": 0.7377665140470399, "grad_norm": 1.5523114204406738, "learning_rate": 1.697531683811089e-06, "loss": 0.7562, "step": 20887 }, { "epoch": 0.7378018358507478, "grad_norm": 0.9424853920936584, "learning_rate": 1.6971022223197814e-06, "loss": 0.564, "step": 20888 }, { "epoch": 0.7378371576544557, "grad_norm": 1.7797188758850098, "learning_rate": 1.6966728040561797e-06, "loss": 0.8132, "step": 20889 }, { "epoch": 0.7378724794581635, "grad_norm": 1.6849603652954102, "learning_rate": 1.6962434290259044e-06, "loss": 0.7689, "step": 20890 }, { "epoch": 0.7379078012618714, "grad_norm": 1.550194263458252, "learning_rate": 1.6958140972345728e-06, "loss": 0.7633, "step": 20891 }, { "epoch": 0.7379431230655793, "grad_norm": 1.764522910118103, "learning_rate": 1.695384808687805e-06, "loss": 0.7993, "step": 20892 }, { "epoch": 0.7379784448692872, "grad_norm": 1.8251886367797852, "learning_rate": 1.6949555633912218e-06, "loss": 0.7951, "step": 20893 }, { "epoch": 0.7380137666729951, "grad_norm": 4.251013278961182, "learning_rate": 1.6945263613504364e-06, "loss": 0.7823, "step": 20894 }, { "epoch": 0.738049088476703, "grad_norm": 1.5471606254577637, "learning_rate": 1.6940972025710694e-06, "loss": 0.724, "step": 20895 }, { "epoch": 0.7380844102804109, "grad_norm": 1.7212198972702026, "learning_rate": 1.693668087058738e-06, "loss": 0.7925, "step": 20896 }, { "epoch": 0.7381197320841189, "grad_norm": 1.6480377912521362, "learning_rate": 1.6932390148190576e-06, "loss": 0.7799, "step": 20897 }, { "epoch": 0.7381550538878268, "grad_norm": 1.9930760860443115, "learning_rate": 1.6928099858576408e-06, "loss": 0.7681, "step": 20898 }, { "epoch": 0.7381903756915347, "grad_norm": 1.8156967163085938, "learning_rate": 1.692381000180106e-06, "loss": 0.8029, "step": 20899 }, { "epoch": 0.7382256974952426, "grad_norm": 1.6221457719802856, "learning_rate": 1.6919520577920685e-06, "loss": 0.7588, "step": 20900 }, { "epoch": 0.7382610192989505, "grad_norm": 1.6482040882110596, "learning_rate": 1.6915231586991388e-06, "loss": 0.7502, "step": 20901 }, { "epoch": 0.7382963411026584, "grad_norm": 1.6536611318588257, "learning_rate": 1.691094302906932e-06, "loss": 0.7422, "step": 20902 }, { "epoch": 0.7383316629063663, "grad_norm": 1.8085829019546509, "learning_rate": 1.690665490421063e-06, "loss": 0.8133, "step": 20903 }, { "epoch": 0.7383669847100742, "grad_norm": 1.7651731967926025, "learning_rate": 1.69023672124714e-06, "loss": 0.7773, "step": 20904 }, { "epoch": 0.7384023065137821, "grad_norm": 2.196056842803955, "learning_rate": 1.6898079953907765e-06, "loss": 0.804, "step": 20905 }, { "epoch": 0.73843762831749, "grad_norm": 1.6187782287597656, "learning_rate": 1.6893793128575853e-06, "loss": 0.7586, "step": 20906 }, { "epoch": 0.738472950121198, "grad_norm": 1.5578148365020752, "learning_rate": 1.6889506736531742e-06, "loss": 0.7951, "step": 20907 }, { "epoch": 0.7385082719249059, "grad_norm": 1.816419243812561, "learning_rate": 1.6885220777831535e-06, "loss": 0.7488, "step": 20908 }, { "epoch": 0.7385435937286138, "grad_norm": 1.6310065984725952, "learning_rate": 1.6880935252531338e-06, "loss": 0.7725, "step": 20909 }, { "epoch": 0.7385789155323217, "grad_norm": 1.6144793033599854, "learning_rate": 1.6876650160687252e-06, "loss": 0.7721, "step": 20910 }, { "epoch": 0.7386142373360296, "grad_norm": 1.7146493196487427, "learning_rate": 1.6872365502355325e-06, "loss": 0.7742, "step": 20911 }, { "epoch": 0.7386495591397375, "grad_norm": 2.027956008911133, "learning_rate": 1.6868081277591652e-06, "loss": 0.766, "step": 20912 }, { "epoch": 0.7386848809434454, "grad_norm": 1.9832825660705566, "learning_rate": 1.6863797486452317e-06, "loss": 0.7747, "step": 20913 }, { "epoch": 0.7387202027471533, "grad_norm": 1.6737147569656372, "learning_rate": 1.685951412899336e-06, "loss": 0.7689, "step": 20914 }, { "epoch": 0.7387555245508612, "grad_norm": 1.611214518547058, "learning_rate": 1.6855231205270862e-06, "loss": 0.7776, "step": 20915 }, { "epoch": 0.738790846354569, "grad_norm": 1.7406054735183716, "learning_rate": 1.685094871534087e-06, "loss": 0.7625, "step": 20916 }, { "epoch": 0.738826168158277, "grad_norm": 1.7232890129089355, "learning_rate": 1.6846666659259415e-06, "loss": 0.7724, "step": 20917 }, { "epoch": 0.7388614899619849, "grad_norm": 1.8167823553085327, "learning_rate": 1.6842385037082554e-06, "loss": 0.7928, "step": 20918 }, { "epoch": 0.7388968117656928, "grad_norm": 1.6564297676086426, "learning_rate": 1.6838103848866326e-06, "loss": 0.7495, "step": 20919 }, { "epoch": 0.7389321335694007, "grad_norm": 1.5076812505722046, "learning_rate": 1.6833823094666774e-06, "loss": 0.7273, "step": 20920 }, { "epoch": 0.7389674553731086, "grad_norm": 5.066958427429199, "learning_rate": 1.6829542774539898e-06, "loss": 0.7524, "step": 20921 }, { "epoch": 0.7390027771768165, "grad_norm": 1.6244101524353027, "learning_rate": 1.6825262888541733e-06, "loss": 0.7532, "step": 20922 }, { "epoch": 0.7390380989805244, "grad_norm": 1.7995634078979492, "learning_rate": 1.68209834367283e-06, "loss": 0.7538, "step": 20923 }, { "epoch": 0.7390734207842323, "grad_norm": 1.6913527250289917, "learning_rate": 1.681670441915559e-06, "loss": 0.7591, "step": 20924 }, { "epoch": 0.7391087425879402, "grad_norm": 2.245727300643921, "learning_rate": 1.6812425835879608e-06, "loss": 0.7842, "step": 20925 }, { "epoch": 0.7391440643916481, "grad_norm": 1.6449296474456787, "learning_rate": 1.6808147686956377e-06, "loss": 0.7497, "step": 20926 }, { "epoch": 0.7391793861953561, "grad_norm": 1.7202268838882446, "learning_rate": 1.6803869972441856e-06, "loss": 0.7675, "step": 20927 }, { "epoch": 0.739214707999064, "grad_norm": 1.724634051322937, "learning_rate": 1.6799592692392042e-06, "loss": 0.7715, "step": 20928 }, { "epoch": 0.7392500298027719, "grad_norm": 2.038872003555298, "learning_rate": 1.679531584686293e-06, "loss": 0.7873, "step": 20929 }, { "epoch": 0.7392853516064798, "grad_norm": 1.732814073562622, "learning_rate": 1.6791039435910473e-06, "loss": 0.7951, "step": 20930 }, { "epoch": 0.7393206734101877, "grad_norm": 1.6656739711761475, "learning_rate": 1.6786763459590649e-06, "loss": 0.7498, "step": 20931 }, { "epoch": 0.7393559952138956, "grad_norm": 1.592832088470459, "learning_rate": 1.6782487917959435e-06, "loss": 0.777, "step": 20932 }, { "epoch": 0.7393913170176035, "grad_norm": 1.8535740375518799, "learning_rate": 1.6778212811072757e-06, "loss": 0.777, "step": 20933 }, { "epoch": 0.7394266388213114, "grad_norm": 2.699174404144287, "learning_rate": 1.6773938138986606e-06, "loss": 0.7733, "step": 20934 }, { "epoch": 0.7394619606250193, "grad_norm": 1.712082862854004, "learning_rate": 1.6769663901756889e-06, "loss": 0.7871, "step": 20935 }, { "epoch": 0.7394972824287273, "grad_norm": 1.7020922899246216, "learning_rate": 1.6765390099439578e-06, "loss": 0.7698, "step": 20936 }, { "epoch": 0.7395326042324352, "grad_norm": 1.6896963119506836, "learning_rate": 1.6761116732090577e-06, "loss": 0.7776, "step": 20937 }, { "epoch": 0.7395679260361431, "grad_norm": 1.663320541381836, "learning_rate": 1.6756843799765838e-06, "loss": 0.7901, "step": 20938 }, { "epoch": 0.739603247839851, "grad_norm": 1.856939673423767, "learning_rate": 1.6752571302521287e-06, "loss": 0.7768, "step": 20939 }, { "epoch": 0.7396385696435589, "grad_norm": 1.694900631904602, "learning_rate": 1.6748299240412824e-06, "loss": 0.7586, "step": 20940 }, { "epoch": 0.7396738914472668, "grad_norm": 1.7033863067626953, "learning_rate": 1.6744027613496367e-06, "loss": 0.7942, "step": 20941 }, { "epoch": 0.7397092132509746, "grad_norm": 2.3178045749664307, "learning_rate": 1.6739756421827835e-06, "loss": 0.7473, "step": 20942 }, { "epoch": 0.7397445350546825, "grad_norm": 3.5406012535095215, "learning_rate": 1.6735485665463108e-06, "loss": 0.7447, "step": 20943 }, { "epoch": 0.7397798568583904, "grad_norm": 1.7042587995529175, "learning_rate": 1.6731215344458096e-06, "loss": 0.7559, "step": 20944 }, { "epoch": 0.7398151786620983, "grad_norm": 2.3205454349517822, "learning_rate": 1.6726945458868692e-06, "loss": 0.7802, "step": 20945 }, { "epoch": 0.7398505004658062, "grad_norm": 1.771243691444397, "learning_rate": 1.672267600875076e-06, "loss": 0.7723, "step": 20946 }, { "epoch": 0.7398858222695142, "grad_norm": 1.745486855506897, "learning_rate": 1.6718406994160185e-06, "loss": 0.8069, "step": 20947 }, { "epoch": 0.7399211440732221, "grad_norm": 1.8766120672225952, "learning_rate": 1.6714138415152865e-06, "loss": 0.7744, "step": 20948 }, { "epoch": 0.73995646587693, "grad_norm": 1.6366283893585205, "learning_rate": 1.6709870271784622e-06, "loss": 0.773, "step": 20949 }, { "epoch": 0.7399917876806379, "grad_norm": 1.711012601852417, "learning_rate": 1.6705602564111345e-06, "loss": 0.7581, "step": 20950 }, { "epoch": 0.7400271094843458, "grad_norm": 1.805686593055725, "learning_rate": 1.6701335292188897e-06, "loss": 0.783, "step": 20951 }, { "epoch": 0.7400624312880537, "grad_norm": 1.5546430349349976, "learning_rate": 1.6697068456073112e-06, "loss": 0.7794, "step": 20952 }, { "epoch": 0.7400977530917616, "grad_norm": 2.092271566390991, "learning_rate": 1.6692802055819819e-06, "loss": 0.7756, "step": 20953 }, { "epoch": 0.7401330748954695, "grad_norm": 1.9381438493728638, "learning_rate": 1.668853609148487e-06, "loss": 0.7889, "step": 20954 }, { "epoch": 0.7401683966991774, "grad_norm": 1.849792242050171, "learning_rate": 1.6684270563124117e-06, "loss": 0.7526, "step": 20955 }, { "epoch": 0.7402037185028854, "grad_norm": 1.7415632009506226, "learning_rate": 1.6680005470793354e-06, "loss": 0.8035, "step": 20956 }, { "epoch": 0.7402390403065933, "grad_norm": 1.747917652130127, "learning_rate": 1.6675740814548413e-06, "loss": 0.7918, "step": 20957 }, { "epoch": 0.7402743621103012, "grad_norm": 1.9441550970077515, "learning_rate": 1.6671476594445124e-06, "loss": 0.7677, "step": 20958 }, { "epoch": 0.7403096839140091, "grad_norm": 1.6016770601272583, "learning_rate": 1.6667212810539275e-06, "loss": 0.7725, "step": 20959 }, { "epoch": 0.740345005717717, "grad_norm": 1.5877219438552856, "learning_rate": 1.6662949462886675e-06, "loss": 0.7742, "step": 20960 }, { "epoch": 0.7403803275214249, "grad_norm": 1.5625783205032349, "learning_rate": 1.6658686551543147e-06, "loss": 0.719, "step": 20961 }, { "epoch": 0.7404156493251328, "grad_norm": 2.0170345306396484, "learning_rate": 1.6654424076564441e-06, "loss": 0.7634, "step": 20962 }, { "epoch": 0.7404509711288407, "grad_norm": 1.741851806640625, "learning_rate": 1.6650162038006362e-06, "loss": 0.7849, "step": 20963 }, { "epoch": 0.7404862929325486, "grad_norm": 1.7326107025146484, "learning_rate": 1.6645900435924717e-06, "loss": 0.7883, "step": 20964 }, { "epoch": 0.7405216147362566, "grad_norm": 3.695119619369507, "learning_rate": 1.6641639270375238e-06, "loss": 0.7509, "step": 20965 }, { "epoch": 0.7405569365399645, "grad_norm": 1.748551845550537, "learning_rate": 1.6637378541413712e-06, "loss": 0.7624, "step": 20966 }, { "epoch": 0.7405922583436724, "grad_norm": 1.584970474243164, "learning_rate": 1.6633118249095903e-06, "loss": 0.7857, "step": 20967 }, { "epoch": 0.7406275801473802, "grad_norm": 1.6667033433914185, "learning_rate": 1.6628858393477587e-06, "loss": 0.786, "step": 20968 }, { "epoch": 0.7406629019510881, "grad_norm": 1.8111157417297363, "learning_rate": 1.6624598974614486e-06, "loss": 0.8051, "step": 20969 }, { "epoch": 0.740698223754796, "grad_norm": 1.6213098764419556, "learning_rate": 1.6620339992562356e-06, "loss": 0.7456, "step": 20970 }, { "epoch": 0.7407335455585039, "grad_norm": 1.6826286315917969, "learning_rate": 1.6616081447376975e-06, "loss": 0.7616, "step": 20971 }, { "epoch": 0.7407688673622118, "grad_norm": 1.7955540418624878, "learning_rate": 1.6611823339114014e-06, "loss": 0.7709, "step": 20972 }, { "epoch": 0.7408041891659197, "grad_norm": 1.6794058084487915, "learning_rate": 1.6607565667829234e-06, "loss": 0.7687, "step": 20973 }, { "epoch": 0.7408395109696276, "grad_norm": 1.9499915838241577, "learning_rate": 1.6603308433578369e-06, "loss": 0.7936, "step": 20974 }, { "epoch": 0.7408748327733355, "grad_norm": 1.7040139436721802, "learning_rate": 1.6599051636417113e-06, "loss": 0.7637, "step": 20975 }, { "epoch": 0.7409101545770435, "grad_norm": 1.694837212562561, "learning_rate": 1.6594795276401188e-06, "loss": 0.7713, "step": 20976 }, { "epoch": 0.7409454763807514, "grad_norm": 1.6396523714065552, "learning_rate": 1.6590539353586304e-06, "loss": 0.8346, "step": 20977 }, { "epoch": 0.7409807981844593, "grad_norm": 1.5425399541854858, "learning_rate": 1.6586283868028176e-06, "loss": 0.7509, "step": 20978 }, { "epoch": 0.7410161199881672, "grad_norm": 1.6920572519302368, "learning_rate": 1.658202881978247e-06, "loss": 0.7415, "step": 20979 }, { "epoch": 0.7410514417918751, "grad_norm": 1.815271258354187, "learning_rate": 1.6577774208904884e-06, "loss": 0.7528, "step": 20980 }, { "epoch": 0.741086763595583, "grad_norm": 1.8226497173309326, "learning_rate": 1.6573520035451124e-06, "loss": 0.8002, "step": 20981 }, { "epoch": 0.7411220853992909, "grad_norm": 1.59593665599823, "learning_rate": 1.6569266299476839e-06, "loss": 0.8, "step": 20982 }, { "epoch": 0.7411574072029988, "grad_norm": 1.9070863723754883, "learning_rate": 1.6565013001037704e-06, "loss": 0.7585, "step": 20983 }, { "epoch": 0.7411927290067067, "grad_norm": 1.7866530418395996, "learning_rate": 1.656076014018942e-06, "loss": 0.7684, "step": 20984 }, { "epoch": 0.7412280508104147, "grad_norm": 2.0286917686462402, "learning_rate": 1.6556507716987597e-06, "loss": 0.7622, "step": 20985 }, { "epoch": 0.7412633726141226, "grad_norm": 1.785762906074524, "learning_rate": 1.655225573148792e-06, "loss": 0.7707, "step": 20986 }, { "epoch": 0.7412986944178305, "grad_norm": 1.5303386449813843, "learning_rate": 1.6548004183746046e-06, "loss": 0.7875, "step": 20987 }, { "epoch": 0.7413340162215384, "grad_norm": 1.796269178390503, "learning_rate": 1.6543753073817587e-06, "loss": 0.767, "step": 20988 }, { "epoch": 0.7413693380252463, "grad_norm": 1.8645762205123901, "learning_rate": 1.6539502401758217e-06, "loss": 0.8253, "step": 20989 }, { "epoch": 0.7414046598289542, "grad_norm": 1.5869990587234497, "learning_rate": 1.6535252167623533e-06, "loss": 0.7636, "step": 20990 }, { "epoch": 0.7414399816326621, "grad_norm": 1.6564322710037231, "learning_rate": 1.6531002371469196e-06, "loss": 0.7452, "step": 20991 }, { "epoch": 0.74147530343637, "grad_norm": 2.3865997791290283, "learning_rate": 1.6526753013350794e-06, "loss": 0.7616, "step": 20992 }, { "epoch": 0.7415106252400779, "grad_norm": 1.5299023389816284, "learning_rate": 1.6522504093323948e-06, "loss": 0.7767, "step": 20993 }, { "epoch": 0.7415459470437857, "grad_norm": 1.7057533264160156, "learning_rate": 1.65182556114443e-06, "loss": 0.7807, "step": 20994 }, { "epoch": 0.7415812688474936, "grad_norm": 1.7771196365356445, "learning_rate": 1.651400756776741e-06, "loss": 0.7409, "step": 20995 }, { "epoch": 0.7416165906512016, "grad_norm": 1.6272296905517578, "learning_rate": 1.6509759962348893e-06, "loss": 0.7616, "step": 20996 }, { "epoch": 0.7416519124549095, "grad_norm": 1.628491997718811, "learning_rate": 1.6505512795244355e-06, "loss": 0.8167, "step": 20997 }, { "epoch": 0.7416872342586174, "grad_norm": 1.6783190965652466, "learning_rate": 1.6501266066509358e-06, "loss": 0.7524, "step": 20998 }, { "epoch": 0.7417225560623253, "grad_norm": 1.8894283771514893, "learning_rate": 1.649701977619949e-06, "loss": 0.7886, "step": 20999 }, { "epoch": 0.7417578778660332, "grad_norm": 1.7554222345352173, "learning_rate": 1.6492773924370353e-06, "loss": 0.7546, "step": 21000 }, { "epoch": 0.7417931996697411, "grad_norm": 1.8763989210128784, "learning_rate": 1.6488528511077468e-06, "loss": 0.7738, "step": 21001 }, { "epoch": 0.741828521473449, "grad_norm": 1.7294899225234985, "learning_rate": 1.6484283536376428e-06, "loss": 0.7868, "step": 21002 }, { "epoch": 0.7418638432771569, "grad_norm": 1.6239502429962158, "learning_rate": 1.6480039000322795e-06, "loss": 0.7783, "step": 21003 }, { "epoch": 0.7418991650808648, "grad_norm": 1.777966856956482, "learning_rate": 1.6475794902972097e-06, "loss": 0.7804, "step": 21004 }, { "epoch": 0.7419344868845728, "grad_norm": 1.7934975624084473, "learning_rate": 1.6471551244379897e-06, "loss": 0.7686, "step": 21005 }, { "epoch": 0.7419698086882807, "grad_norm": 1.9047739505767822, "learning_rate": 1.6467308024601748e-06, "loss": 0.7765, "step": 21006 }, { "epoch": 0.7420051304919886, "grad_norm": 1.8187592029571533, "learning_rate": 1.6463065243693144e-06, "loss": 0.7794, "step": 21007 }, { "epoch": 0.7420404522956965, "grad_norm": 1.7076324224472046, "learning_rate": 1.6458822901709659e-06, "loss": 0.7671, "step": 21008 }, { "epoch": 0.7420757740994044, "grad_norm": 1.8246760368347168, "learning_rate": 1.6454580998706782e-06, "loss": 0.7481, "step": 21009 }, { "epoch": 0.7421110959031123, "grad_norm": 1.6739870309829712, "learning_rate": 1.6450339534740057e-06, "loss": 0.7819, "step": 21010 }, { "epoch": 0.7421464177068202, "grad_norm": 1.6150192022323608, "learning_rate": 1.6446098509864961e-06, "loss": 0.7845, "step": 21011 }, { "epoch": 0.7421817395105281, "grad_norm": 1.793284296989441, "learning_rate": 1.6441857924137023e-06, "loss": 0.7855, "step": 21012 }, { "epoch": 0.742217061314236, "grad_norm": 1.9554369449615479, "learning_rate": 1.6437617777611758e-06, "loss": 0.7262, "step": 21013 }, { "epoch": 0.742252383117944, "grad_norm": 1.6297868490219116, "learning_rate": 1.6433378070344624e-06, "loss": 0.7376, "step": 21014 }, { "epoch": 0.7422877049216519, "grad_norm": 1.8873225450515747, "learning_rate": 1.6429138802391127e-06, "loss": 0.75, "step": 21015 }, { "epoch": 0.7423230267253598, "grad_norm": 1.8532909154891968, "learning_rate": 1.642489997380677e-06, "loss": 0.772, "step": 21016 }, { "epoch": 0.7423583485290677, "grad_norm": 1.5368510484695435, "learning_rate": 1.6420661584646996e-06, "loss": 0.7518, "step": 21017 }, { "epoch": 0.7423936703327756, "grad_norm": 1.5992012023925781, "learning_rate": 1.6416423634967288e-06, "loss": 0.7768, "step": 21018 }, { "epoch": 0.7424289921364835, "grad_norm": 1.8331172466278076, "learning_rate": 1.641218612482313e-06, "loss": 0.8208, "step": 21019 }, { "epoch": 0.7424643139401913, "grad_norm": 1.9042092561721802, "learning_rate": 1.640794905426995e-06, "loss": 0.7856, "step": 21020 }, { "epoch": 0.7424996357438992, "grad_norm": 1.6136897802352905, "learning_rate": 1.6403712423363221e-06, "loss": 0.7775, "step": 21021 }, { "epoch": 0.7425349575476071, "grad_norm": 1.7057511806488037, "learning_rate": 1.6399476232158401e-06, "loss": 0.7722, "step": 21022 }, { "epoch": 0.742570279351315, "grad_norm": 1.704655408859253, "learning_rate": 1.6395240480710905e-06, "loss": 0.7698, "step": 21023 }, { "epoch": 0.7426056011550229, "grad_norm": 2.727060556411743, "learning_rate": 1.6391005169076185e-06, "loss": 0.7429, "step": 21024 }, { "epoch": 0.7426409229587309, "grad_norm": 1.7299737930297852, "learning_rate": 1.6386770297309673e-06, "loss": 0.7399, "step": 21025 }, { "epoch": 0.7426762447624388, "grad_norm": 1.9888837337493896, "learning_rate": 1.6382535865466825e-06, "loss": 0.7521, "step": 21026 }, { "epoch": 0.7427115665661467, "grad_norm": 1.994840145111084, "learning_rate": 1.6378301873602992e-06, "loss": 0.765, "step": 21027 }, { "epoch": 0.7427468883698546, "grad_norm": 1.6687514781951904, "learning_rate": 1.6374068321773628e-06, "loss": 0.7692, "step": 21028 }, { "epoch": 0.7427822101735625, "grad_norm": 1.6880029439926147, "learning_rate": 1.6369835210034152e-06, "loss": 0.7613, "step": 21029 }, { "epoch": 0.7428175319772704, "grad_norm": 1.6404372453689575, "learning_rate": 1.6365602538439934e-06, "loss": 0.7353, "step": 21030 }, { "epoch": 0.7428528537809783, "grad_norm": 1.7259700298309326, "learning_rate": 1.6361370307046386e-06, "loss": 0.7377, "step": 21031 }, { "epoch": 0.7428881755846862, "grad_norm": 2.233813762664795, "learning_rate": 1.6357138515908921e-06, "loss": 0.7561, "step": 21032 }, { "epoch": 0.7429234973883941, "grad_norm": 1.6556110382080078, "learning_rate": 1.6352907165082882e-06, "loss": 0.7887, "step": 21033 }, { "epoch": 0.742958819192102, "grad_norm": 1.8150882720947266, "learning_rate": 1.634867625462367e-06, "loss": 0.7867, "step": 21034 }, { "epoch": 0.74299414099581, "grad_norm": 1.6869815587997437, "learning_rate": 1.634444578458666e-06, "loss": 0.7296, "step": 21035 }, { "epoch": 0.7430294627995179, "grad_norm": 1.6860101222991943, "learning_rate": 1.6340215755027227e-06, "loss": 0.748, "step": 21036 }, { "epoch": 0.7430647846032258, "grad_norm": 1.8434782028198242, "learning_rate": 1.6335986166000711e-06, "loss": 0.777, "step": 21037 }, { "epoch": 0.7431001064069337, "grad_norm": 1.6025160551071167, "learning_rate": 1.6331757017562483e-06, "loss": 0.7728, "step": 21038 }, { "epoch": 0.7431354282106416, "grad_norm": 1.6827119588851929, "learning_rate": 1.6327528309767904e-06, "loss": 0.8015, "step": 21039 }, { "epoch": 0.7431707500143495, "grad_norm": 1.5048346519470215, "learning_rate": 1.6323300042672285e-06, "loss": 0.7463, "step": 21040 }, { "epoch": 0.7432060718180574, "grad_norm": 1.8012034893035889, "learning_rate": 1.631907221633099e-06, "loss": 0.7931, "step": 21041 }, { "epoch": 0.7432413936217653, "grad_norm": 1.9448893070220947, "learning_rate": 1.6314844830799359e-06, "loss": 0.7368, "step": 21042 }, { "epoch": 0.7432767154254732, "grad_norm": 1.8457704782485962, "learning_rate": 1.631061788613269e-06, "loss": 0.7775, "step": 21043 }, { "epoch": 0.7433120372291812, "grad_norm": 1.611696481704712, "learning_rate": 1.6306391382386323e-06, "loss": 0.8028, "step": 21044 }, { "epoch": 0.7433473590328891, "grad_norm": 1.8559483289718628, "learning_rate": 1.6302165319615588e-06, "loss": 0.7864, "step": 21045 }, { "epoch": 0.743382680836597, "grad_norm": 1.779064416885376, "learning_rate": 1.629793969787578e-06, "loss": 0.7932, "step": 21046 }, { "epoch": 0.7434180026403048, "grad_norm": 1.7027692794799805, "learning_rate": 1.6293714517222187e-06, "loss": 0.758, "step": 21047 }, { "epoch": 0.7434533244440127, "grad_norm": 1.5390135049819946, "learning_rate": 1.6289489777710116e-06, "loss": 0.7378, "step": 21048 }, { "epoch": 0.7434886462477206, "grad_norm": 1.635872721672058, "learning_rate": 1.6285265479394885e-06, "loss": 0.7636, "step": 21049 }, { "epoch": 0.7435239680514285, "grad_norm": 1.6676818132400513, "learning_rate": 1.628104162233175e-06, "loss": 0.7713, "step": 21050 }, { "epoch": 0.7435592898551364, "grad_norm": 1.6299442052841187, "learning_rate": 1.6276818206576e-06, "loss": 0.7538, "step": 21051 }, { "epoch": 0.7435946116588443, "grad_norm": 1.540157437324524, "learning_rate": 1.627259523218293e-06, "loss": 0.745, "step": 21052 }, { "epoch": 0.7436299334625522, "grad_norm": 1.6871533393859863, "learning_rate": 1.626837269920778e-06, "loss": 0.754, "step": 21053 }, { "epoch": 0.7436652552662602, "grad_norm": 1.8782514333724976, "learning_rate": 1.6264150607705819e-06, "loss": 0.781, "step": 21054 }, { "epoch": 0.7437005770699681, "grad_norm": 1.7756201028823853, "learning_rate": 1.625992895773234e-06, "loss": 0.7755, "step": 21055 }, { "epoch": 0.743735898873676, "grad_norm": 1.7697964906692505, "learning_rate": 1.6255707749342547e-06, "loss": 0.7971, "step": 21056 }, { "epoch": 0.7437712206773839, "grad_norm": 2.179746627807617, "learning_rate": 1.6251486982591709e-06, "loss": 0.8131, "step": 21057 }, { "epoch": 0.7438065424810918, "grad_norm": 1.8201324939727783, "learning_rate": 1.6247266657535077e-06, "loss": 0.771, "step": 21058 }, { "epoch": 0.7438418642847997, "grad_norm": 1.8013652563095093, "learning_rate": 1.6243046774227867e-06, "loss": 0.762, "step": 21059 }, { "epoch": 0.7438771860885076, "grad_norm": 1.728254795074463, "learning_rate": 1.6238827332725305e-06, "loss": 0.7717, "step": 21060 }, { "epoch": 0.7439125078922155, "grad_norm": 1.6762064695358276, "learning_rate": 1.6234608333082646e-06, "loss": 0.7644, "step": 21061 }, { "epoch": 0.7439478296959234, "grad_norm": 1.7557823657989502, "learning_rate": 1.6230389775355071e-06, "loss": 0.7727, "step": 21062 }, { "epoch": 0.7439831514996313, "grad_norm": 1.635926604270935, "learning_rate": 1.6226171659597822e-06, "loss": 0.7555, "step": 21063 }, { "epoch": 0.7440184733033393, "grad_norm": 1.6867060661315918, "learning_rate": 1.6221953985866073e-06, "loss": 0.7441, "step": 21064 }, { "epoch": 0.7440537951070472, "grad_norm": 2.143954038619995, "learning_rate": 1.6217736754215058e-06, "loss": 0.7744, "step": 21065 }, { "epoch": 0.7440891169107551, "grad_norm": 1.745662808418274, "learning_rate": 1.6213519964699936e-06, "loss": 0.7668, "step": 21066 }, { "epoch": 0.744124438714463, "grad_norm": 1.7477154731750488, "learning_rate": 1.620930361737591e-06, "loss": 0.7551, "step": 21067 }, { "epoch": 0.7441597605181709, "grad_norm": 1.874163269996643, "learning_rate": 1.620508771229819e-06, "loss": 0.7702, "step": 21068 }, { "epoch": 0.7441950823218788, "grad_norm": 1.6320009231567383, "learning_rate": 1.620087224952191e-06, "loss": 0.7861, "step": 21069 }, { "epoch": 0.7442304041255867, "grad_norm": 1.7731226682662964, "learning_rate": 1.619665722910226e-06, "loss": 0.7914, "step": 21070 }, { "epoch": 0.7442657259292946, "grad_norm": 1.6482027769088745, "learning_rate": 1.619244265109442e-06, "loss": 0.7539, "step": 21071 }, { "epoch": 0.7443010477330025, "grad_norm": 1.6753497123718262, "learning_rate": 1.6188228515553523e-06, "loss": 0.7621, "step": 21072 }, { "epoch": 0.7443363695367103, "grad_norm": 1.6820306777954102, "learning_rate": 1.6184014822534732e-06, "loss": 0.7602, "step": 21073 }, { "epoch": 0.7443716913404183, "grad_norm": 2.15751314163208, "learning_rate": 1.6179801572093212e-06, "loss": 0.7516, "step": 21074 }, { "epoch": 0.7444070131441262, "grad_norm": 1.7185897827148438, "learning_rate": 1.6175588764284084e-06, "loss": 0.768, "step": 21075 }, { "epoch": 0.7444423349478341, "grad_norm": 1.451724886894226, "learning_rate": 1.6171376399162487e-06, "loss": 0.7431, "step": 21076 }, { "epoch": 0.744477656751542, "grad_norm": 1.6543445587158203, "learning_rate": 1.616716447678357e-06, "loss": 0.7774, "step": 21077 }, { "epoch": 0.7445129785552499, "grad_norm": 1.6943583488464355, "learning_rate": 1.616295299720243e-06, "loss": 0.7593, "step": 21078 }, { "epoch": 0.7445483003589578, "grad_norm": 1.7525993585586548, "learning_rate": 1.6158741960474206e-06, "loss": 0.7503, "step": 21079 }, { "epoch": 0.7445836221626657, "grad_norm": 2.1291229724884033, "learning_rate": 1.6154531366654014e-06, "loss": 0.8173, "step": 21080 }, { "epoch": 0.7446189439663736, "grad_norm": 1.8379713296890259, "learning_rate": 1.6150321215796943e-06, "loss": 0.793, "step": 21081 }, { "epoch": 0.7446542657700815, "grad_norm": 1.734394907951355, "learning_rate": 1.6146111507958118e-06, "loss": 0.8059, "step": 21082 }, { "epoch": 0.7446895875737894, "grad_norm": 1.6610013246536255, "learning_rate": 1.614190224319261e-06, "loss": 0.7477, "step": 21083 }, { "epoch": 0.7447249093774974, "grad_norm": 1.7857677936553955, "learning_rate": 1.6137693421555528e-06, "loss": 0.7965, "step": 21084 }, { "epoch": 0.7447602311812053, "grad_norm": 1.8042548894882202, "learning_rate": 1.6133485043101937e-06, "loss": 0.7735, "step": 21085 }, { "epoch": 0.7447955529849132, "grad_norm": 1.7017078399658203, "learning_rate": 1.6129277107886931e-06, "loss": 0.7549, "step": 21086 }, { "epoch": 0.7448308747886211, "grad_norm": 1.683184266090393, "learning_rate": 1.6125069615965593e-06, "loss": 0.8029, "step": 21087 }, { "epoch": 0.744866196592329, "grad_norm": 2.0825705528259277, "learning_rate": 1.6120862567392958e-06, "loss": 0.8004, "step": 21088 }, { "epoch": 0.7449015183960369, "grad_norm": 1.5875163078308105, "learning_rate": 1.6116655962224108e-06, "loss": 0.7531, "step": 21089 }, { "epoch": 0.7449368401997448, "grad_norm": 1.7414997816085815, "learning_rate": 1.6112449800514113e-06, "loss": 0.7825, "step": 21090 }, { "epoch": 0.7449721620034527, "grad_norm": 1.7066651582717896, "learning_rate": 1.6108244082317987e-06, "loss": 0.7814, "step": 21091 }, { "epoch": 0.7450074838071606, "grad_norm": 2.027555465698242, "learning_rate": 1.6104038807690792e-06, "loss": 0.7638, "step": 21092 }, { "epoch": 0.7450428056108686, "grad_norm": 2.0843281745910645, "learning_rate": 1.6099833976687563e-06, "loss": 0.7918, "step": 21093 }, { "epoch": 0.7450781274145765, "grad_norm": 1.9284213781356812, "learning_rate": 1.6095629589363353e-06, "loss": 0.7641, "step": 21094 }, { "epoch": 0.7451134492182844, "grad_norm": 1.8507899045944214, "learning_rate": 1.609142564577315e-06, "loss": 0.7799, "step": 21095 }, { "epoch": 0.7451487710219923, "grad_norm": 1.6350852251052856, "learning_rate": 1.6087222145971998e-06, "loss": 0.7958, "step": 21096 }, { "epoch": 0.7451840928257002, "grad_norm": 1.6731587648391724, "learning_rate": 1.6083019090014922e-06, "loss": 0.7479, "step": 21097 }, { "epoch": 0.7452194146294081, "grad_norm": 1.7974560260772705, "learning_rate": 1.60788164779569e-06, "loss": 0.768, "step": 21098 }, { "epoch": 0.7452547364331159, "grad_norm": 1.7777111530303955, "learning_rate": 1.6074614309852953e-06, "loss": 0.7818, "step": 21099 }, { "epoch": 0.7452900582368238, "grad_norm": 1.6133633852005005, "learning_rate": 1.6070412585758104e-06, "loss": 0.7618, "step": 21100 }, { "epoch": 0.7453253800405317, "grad_norm": 1.8056052923202515, "learning_rate": 1.6066211305727292e-06, "loss": 0.7373, "step": 21101 }, { "epoch": 0.7453607018442396, "grad_norm": 1.7381539344787598, "learning_rate": 1.6062010469815525e-06, "loss": 0.7657, "step": 21102 }, { "epoch": 0.7453960236479475, "grad_norm": 1.7771556377410889, "learning_rate": 1.6057810078077784e-06, "loss": 0.7731, "step": 21103 }, { "epoch": 0.7454313454516555, "grad_norm": 1.562645673751831, "learning_rate": 1.6053610130569058e-06, "loss": 0.7427, "step": 21104 }, { "epoch": 0.7454666672553634, "grad_norm": 1.870186448097229, "learning_rate": 1.6049410627344292e-06, "loss": 0.8074, "step": 21105 }, { "epoch": 0.7455019890590713, "grad_norm": 1.7236385345458984, "learning_rate": 1.6045211568458451e-06, "loss": 0.747, "step": 21106 }, { "epoch": 0.7455373108627792, "grad_norm": 1.7045238018035889, "learning_rate": 1.6041012953966517e-06, "loss": 0.7943, "step": 21107 }, { "epoch": 0.7455726326664871, "grad_norm": 1.8906340599060059, "learning_rate": 1.6036814783923405e-06, "loss": 0.7803, "step": 21108 }, { "epoch": 0.745607954470195, "grad_norm": 1.5850292444229126, "learning_rate": 1.603261705838408e-06, "loss": 0.7778, "step": 21109 }, { "epoch": 0.7456432762739029, "grad_norm": 1.7351611852645874, "learning_rate": 1.6028419777403492e-06, "loss": 0.7554, "step": 21110 }, { "epoch": 0.7456785980776108, "grad_norm": 2.486818552017212, "learning_rate": 1.6024222941036543e-06, "loss": 0.7658, "step": 21111 }, { "epoch": 0.7457139198813187, "grad_norm": 1.9117212295532227, "learning_rate": 1.6020026549338174e-06, "loss": 0.7699, "step": 21112 }, { "epoch": 0.7457492416850267, "grad_norm": 1.7391022443771362, "learning_rate": 1.601583060236333e-06, "loss": 0.7562, "step": 21113 }, { "epoch": 0.7457845634887346, "grad_norm": 1.7760738134384155, "learning_rate": 1.6011635100166895e-06, "loss": 0.8005, "step": 21114 }, { "epoch": 0.7458198852924425, "grad_norm": 1.7328531742095947, "learning_rate": 1.6007440042803784e-06, "loss": 0.7596, "step": 21115 }, { "epoch": 0.7458552070961504, "grad_norm": 1.6397193670272827, "learning_rate": 1.6003245430328923e-06, "loss": 0.7704, "step": 21116 }, { "epoch": 0.7458905288998583, "grad_norm": 1.7470184564590454, "learning_rate": 1.5999051262797183e-06, "loss": 0.7812, "step": 21117 }, { "epoch": 0.7459258507035662, "grad_norm": 1.666915774345398, "learning_rate": 1.5994857540263487e-06, "loss": 0.7659, "step": 21118 }, { "epoch": 0.7459611725072741, "grad_norm": 1.659716010093689, "learning_rate": 1.5990664262782684e-06, "loss": 0.7612, "step": 21119 }, { "epoch": 0.745996494310982, "grad_norm": 1.747987985610962, "learning_rate": 1.598647143040969e-06, "loss": 0.7861, "step": 21120 }, { "epoch": 0.7460318161146899, "grad_norm": 1.9509994983673096, "learning_rate": 1.5982279043199345e-06, "loss": 0.7662, "step": 21121 }, { "epoch": 0.7460671379183978, "grad_norm": 1.740362524986267, "learning_rate": 1.5978087101206541e-06, "loss": 0.7862, "step": 21122 }, { "epoch": 0.7461024597221058, "grad_norm": 1.9372674226760864, "learning_rate": 1.5973895604486155e-06, "loss": 0.769, "step": 21123 }, { "epoch": 0.7461377815258137, "grad_norm": 1.5340919494628906, "learning_rate": 1.5969704553093006e-06, "loss": 0.7757, "step": 21124 }, { "epoch": 0.7461731033295215, "grad_norm": 1.7813867330551147, "learning_rate": 1.5965513947081972e-06, "loss": 0.7871, "step": 21125 }, { "epoch": 0.7462084251332294, "grad_norm": 1.8333089351654053, "learning_rate": 1.5961323786507909e-06, "loss": 0.776, "step": 21126 }, { "epoch": 0.7462437469369373, "grad_norm": 1.6465564966201782, "learning_rate": 1.5957134071425623e-06, "loss": 0.8109, "step": 21127 }, { "epoch": 0.7462790687406452, "grad_norm": 1.7865198850631714, "learning_rate": 1.5952944801889969e-06, "loss": 0.7929, "step": 21128 }, { "epoch": 0.7463143905443531, "grad_norm": 3.2857813835144043, "learning_rate": 1.5948755977955787e-06, "loss": 0.8193, "step": 21129 }, { "epoch": 0.746349712348061, "grad_norm": 1.7600528001785278, "learning_rate": 1.5944567599677873e-06, "loss": 0.8277, "step": 21130 }, { "epoch": 0.7463850341517689, "grad_norm": 1.709740161895752, "learning_rate": 1.5940379667111055e-06, "loss": 0.7373, "step": 21131 }, { "epoch": 0.7464203559554768, "grad_norm": 5.006278991699219, "learning_rate": 1.5936192180310161e-06, "loss": 0.7302, "step": 21132 }, { "epoch": 0.7464556777591848, "grad_norm": 1.8866841793060303, "learning_rate": 1.5932005139329965e-06, "loss": 0.7646, "step": 21133 }, { "epoch": 0.7464909995628927, "grad_norm": 1.6939581632614136, "learning_rate": 1.5927818544225283e-06, "loss": 0.7628, "step": 21134 }, { "epoch": 0.7465263213666006, "grad_norm": 1.6303513050079346, "learning_rate": 1.592363239505092e-06, "loss": 0.7575, "step": 21135 }, { "epoch": 0.7465616431703085, "grad_norm": 1.7317782640457153, "learning_rate": 1.5919446691861633e-06, "loss": 0.7944, "step": 21136 }, { "epoch": 0.7465969649740164, "grad_norm": 1.6340301036834717, "learning_rate": 1.5915261434712243e-06, "loss": 0.7383, "step": 21137 }, { "epoch": 0.7466322867777243, "grad_norm": 1.8384363651275635, "learning_rate": 1.5911076623657484e-06, "loss": 0.7884, "step": 21138 }, { "epoch": 0.7466676085814322, "grad_norm": 1.8798983097076416, "learning_rate": 1.5906892258752165e-06, "loss": 0.782, "step": 21139 }, { "epoch": 0.7467029303851401, "grad_norm": 1.6112619638442993, "learning_rate": 1.5902708340051016e-06, "loss": 0.7734, "step": 21140 }, { "epoch": 0.746738252188848, "grad_norm": 1.7319871187210083, "learning_rate": 1.5898524867608806e-06, "loss": 0.7629, "step": 21141 }, { "epoch": 0.746773573992556, "grad_norm": 1.6416728496551514, "learning_rate": 1.589434184148031e-06, "loss": 0.7703, "step": 21142 }, { "epoch": 0.7468088957962639, "grad_norm": 1.6726787090301514, "learning_rate": 1.589015926172024e-06, "loss": 0.7554, "step": 21143 }, { "epoch": 0.7468442175999718, "grad_norm": 1.6273698806762695, "learning_rate": 1.5885977128383357e-06, "loss": 0.7923, "step": 21144 }, { "epoch": 0.7468795394036797, "grad_norm": 1.6055794954299927, "learning_rate": 1.5881795441524405e-06, "loss": 0.745, "step": 21145 }, { "epoch": 0.7469148612073876, "grad_norm": 1.7672371864318848, "learning_rate": 1.5877614201198089e-06, "loss": 0.781, "step": 21146 }, { "epoch": 0.7469501830110955, "grad_norm": 1.6334489583969116, "learning_rate": 1.5873433407459139e-06, "loss": 0.8153, "step": 21147 }, { "epoch": 0.7469855048148034, "grad_norm": 1.6024092435836792, "learning_rate": 1.5869253060362294e-06, "loss": 0.7502, "step": 21148 }, { "epoch": 0.7470208266185113, "grad_norm": 1.7195568084716797, "learning_rate": 1.5865073159962235e-06, "loss": 0.7676, "step": 21149 }, { "epoch": 0.7470561484222192, "grad_norm": 2.142512798309326, "learning_rate": 1.5860893706313685e-06, "loss": 0.7966, "step": 21150 }, { "epoch": 0.747091470225927, "grad_norm": 1.5970690250396729, "learning_rate": 1.5856714699471337e-06, "loss": 0.7581, "step": 21151 }, { "epoch": 0.747126792029635, "grad_norm": 1.8437312841415405, "learning_rate": 1.5852536139489904e-06, "loss": 0.7636, "step": 21152 }, { "epoch": 0.7471621138333429, "grad_norm": 1.735398292541504, "learning_rate": 1.5848358026424043e-06, "loss": 0.7925, "step": 21153 }, { "epoch": 0.7471974356370508, "grad_norm": 2.1976490020751953, "learning_rate": 1.5844180360328454e-06, "loss": 0.7647, "step": 21154 }, { "epoch": 0.7472327574407587, "grad_norm": 1.8096905946731567, "learning_rate": 1.5840003141257849e-06, "loss": 0.7731, "step": 21155 }, { "epoch": 0.7472680792444666, "grad_norm": 1.5739624500274658, "learning_rate": 1.583582636926682e-06, "loss": 0.7598, "step": 21156 }, { "epoch": 0.7473034010481745, "grad_norm": 1.7905117273330688, "learning_rate": 1.583165004441008e-06, "loss": 0.778, "step": 21157 }, { "epoch": 0.7473387228518824, "grad_norm": 1.7436962127685547, "learning_rate": 1.582747416674229e-06, "loss": 0.7362, "step": 21158 }, { "epoch": 0.7473740446555903, "grad_norm": 2.2418973445892334, "learning_rate": 1.5823298736318081e-06, "loss": 0.7742, "step": 21159 }, { "epoch": 0.7474093664592982, "grad_norm": 1.7818117141723633, "learning_rate": 1.5819123753192117e-06, "loss": 0.7992, "step": 21160 }, { "epoch": 0.7474446882630061, "grad_norm": 1.7855130434036255, "learning_rate": 1.5814949217419034e-06, "loss": 0.7587, "step": 21161 }, { "epoch": 0.747480010066714, "grad_norm": 1.7065449953079224, "learning_rate": 1.581077512905349e-06, "loss": 0.732, "step": 21162 }, { "epoch": 0.747515331870422, "grad_norm": 1.7803882360458374, "learning_rate": 1.5806601488150075e-06, "loss": 0.7572, "step": 21163 }, { "epoch": 0.7475506536741299, "grad_norm": 1.6415201425552368, "learning_rate": 1.5802428294763427e-06, "loss": 0.7646, "step": 21164 }, { "epoch": 0.7475859754778378, "grad_norm": 1.578473448753357, "learning_rate": 1.5798255548948194e-06, "loss": 0.7726, "step": 21165 }, { "epoch": 0.7476212972815457, "grad_norm": 1.590937852859497, "learning_rate": 1.579408325075894e-06, "loss": 0.7832, "step": 21166 }, { "epoch": 0.7476566190852536, "grad_norm": 1.7352441549301147, "learning_rate": 1.5789911400250297e-06, "loss": 0.7631, "step": 21167 }, { "epoch": 0.7476919408889615, "grad_norm": 1.8155735731124878, "learning_rate": 1.5785739997476879e-06, "loss": 0.7778, "step": 21168 }, { "epoch": 0.7477272626926694, "grad_norm": 1.6374529600143433, "learning_rate": 1.578156904249325e-06, "loss": 0.7976, "step": 21169 }, { "epoch": 0.7477625844963773, "grad_norm": 2.2082176208496094, "learning_rate": 1.577739853535401e-06, "loss": 0.7619, "step": 21170 }, { "epoch": 0.7477979063000852, "grad_norm": 1.6577918529510498, "learning_rate": 1.5773228476113762e-06, "loss": 0.7497, "step": 21171 }, { "epoch": 0.7478332281037932, "grad_norm": 1.8104486465454102, "learning_rate": 1.5769058864827053e-06, "loss": 0.7871, "step": 21172 }, { "epoch": 0.7478685499075011, "grad_norm": 1.6464848518371582, "learning_rate": 1.5764889701548457e-06, "loss": 0.7542, "step": 21173 }, { "epoch": 0.747903871711209, "grad_norm": 1.6814498901367188, "learning_rate": 1.5760720986332573e-06, "loss": 0.776, "step": 21174 }, { "epoch": 0.7479391935149169, "grad_norm": 1.6568080186843872, "learning_rate": 1.5756552719233937e-06, "loss": 0.776, "step": 21175 }, { "epoch": 0.7479745153186248, "grad_norm": 1.5828721523284912, "learning_rate": 1.5752384900307084e-06, "loss": 0.7731, "step": 21176 }, { "epoch": 0.7480098371223326, "grad_norm": 1.728128433227539, "learning_rate": 1.574821752960658e-06, "loss": 0.7641, "step": 21177 }, { "epoch": 0.7480451589260405, "grad_norm": 1.6995099782943726, "learning_rate": 1.5744050607186984e-06, "loss": 0.7447, "step": 21178 }, { "epoch": 0.7480804807297484, "grad_norm": 1.6635328531265259, "learning_rate": 1.5739884133102795e-06, "loss": 0.7542, "step": 21179 }, { "epoch": 0.7481158025334563, "grad_norm": 1.5566638708114624, "learning_rate": 1.5735718107408566e-06, "loss": 0.7553, "step": 21180 }, { "epoch": 0.7481511243371642, "grad_norm": 0.9021801352500916, "learning_rate": 1.5731552530158827e-06, "loss": 0.5556, "step": 21181 }, { "epoch": 0.7481864461408722, "grad_norm": 1.5643407106399536, "learning_rate": 1.572738740140808e-06, "loss": 0.7451, "step": 21182 }, { "epoch": 0.7482217679445801, "grad_norm": 1.718422293663025, "learning_rate": 1.5723222721210834e-06, "loss": 0.7651, "step": 21183 }, { "epoch": 0.748257089748288, "grad_norm": 1.5758335590362549, "learning_rate": 1.5719058489621625e-06, "loss": 0.7485, "step": 21184 }, { "epoch": 0.7482924115519959, "grad_norm": 1.5991144180297852, "learning_rate": 1.5714894706694921e-06, "loss": 0.7405, "step": 21185 }, { "epoch": 0.7483277333557038, "grad_norm": 2.0793139934539795, "learning_rate": 1.5710731372485232e-06, "loss": 0.7865, "step": 21186 }, { "epoch": 0.7483630551594117, "grad_norm": 1.727229356765747, "learning_rate": 1.5706568487047058e-06, "loss": 0.7618, "step": 21187 }, { "epoch": 0.7483983769631196, "grad_norm": 3.9313151836395264, "learning_rate": 1.5702406050434854e-06, "loss": 0.7568, "step": 21188 }, { "epoch": 0.7484336987668275, "grad_norm": 1.5680805444717407, "learning_rate": 1.569824406270311e-06, "loss": 0.7449, "step": 21189 }, { "epoch": 0.7484690205705354, "grad_norm": 2.472499370574951, "learning_rate": 1.5694082523906317e-06, "loss": 0.7849, "step": 21190 }, { "epoch": 0.7485043423742433, "grad_norm": 1.719636082649231, "learning_rate": 1.568992143409891e-06, "loss": 0.7563, "step": 21191 }, { "epoch": 0.7485396641779513, "grad_norm": 1.7481666803359985, "learning_rate": 1.5685760793335375e-06, "loss": 0.7311, "step": 21192 }, { "epoch": 0.7485749859816592, "grad_norm": 1.5994683504104614, "learning_rate": 1.5681600601670133e-06, "loss": 0.7363, "step": 21193 }, { "epoch": 0.7486103077853671, "grad_norm": 1.610908031463623, "learning_rate": 1.5677440859157671e-06, "loss": 0.7412, "step": 21194 }, { "epoch": 0.748645629589075, "grad_norm": 1.5312557220458984, "learning_rate": 1.5673281565852395e-06, "loss": 0.7551, "step": 21195 }, { "epoch": 0.7486809513927829, "grad_norm": 1.7156356573104858, "learning_rate": 1.566912272180876e-06, "loss": 0.8109, "step": 21196 }, { "epoch": 0.7487162731964908, "grad_norm": 1.8309385776519775, "learning_rate": 1.5664964327081205e-06, "loss": 0.7813, "step": 21197 }, { "epoch": 0.7487515950001987, "grad_norm": 1.9562327861785889, "learning_rate": 1.5660806381724125e-06, "loss": 0.7684, "step": 21198 }, { "epoch": 0.7487869168039066, "grad_norm": 1.6921494007110596, "learning_rate": 1.5656648885791958e-06, "loss": 0.7409, "step": 21199 }, { "epoch": 0.7488222386076145, "grad_norm": 1.8611449003219604, "learning_rate": 1.5652491839339134e-06, "loss": 0.779, "step": 21200 }, { "epoch": 0.7488575604113225, "grad_norm": 1.6189559698104858, "learning_rate": 1.564833524242002e-06, "loss": 0.7525, "step": 21201 }, { "epoch": 0.7488928822150304, "grad_norm": 2.987288475036621, "learning_rate": 1.564417909508904e-06, "loss": 0.7557, "step": 21202 }, { "epoch": 0.7489282040187382, "grad_norm": 1.511210560798645, "learning_rate": 1.56400233974006e-06, "loss": 0.7796, "step": 21203 }, { "epoch": 0.7489635258224461, "grad_norm": 1.8490337133407593, "learning_rate": 1.5635868149409062e-06, "loss": 0.7794, "step": 21204 }, { "epoch": 0.748998847626154, "grad_norm": 1.657239556312561, "learning_rate": 1.563171335116882e-06, "loss": 0.818, "step": 21205 }, { "epoch": 0.7490341694298619, "grad_norm": 1.627565860748291, "learning_rate": 1.5627559002734267e-06, "loss": 0.7907, "step": 21206 }, { "epoch": 0.7490694912335698, "grad_norm": 1.8585526943206787, "learning_rate": 1.5623405104159745e-06, "loss": 0.7864, "step": 21207 }, { "epoch": 0.7491048130372777, "grad_norm": 1.5219309329986572, "learning_rate": 1.5619251655499634e-06, "loss": 0.7558, "step": 21208 }, { "epoch": 0.7491401348409856, "grad_norm": 1.6143566370010376, "learning_rate": 1.5615098656808297e-06, "loss": 0.7541, "step": 21209 }, { "epoch": 0.7491754566446935, "grad_norm": 3.6721036434173584, "learning_rate": 1.5610946108140102e-06, "loss": 0.7943, "step": 21210 }, { "epoch": 0.7492107784484014, "grad_norm": 1.7645771503448486, "learning_rate": 1.5606794009549375e-06, "loss": 0.7871, "step": 21211 }, { "epoch": 0.7492461002521094, "grad_norm": 1.7198963165283203, "learning_rate": 1.5602642361090453e-06, "loss": 0.7776, "step": 21212 }, { "epoch": 0.7492814220558173, "grad_norm": 1.7180696725845337, "learning_rate": 1.559849116281769e-06, "loss": 0.782, "step": 21213 }, { "epoch": 0.7493167438595252, "grad_norm": 2.033874273300171, "learning_rate": 1.5594340414785403e-06, "loss": 0.7574, "step": 21214 }, { "epoch": 0.7493520656632331, "grad_norm": 1.6026891469955444, "learning_rate": 1.559019011704791e-06, "loss": 0.765, "step": 21215 }, { "epoch": 0.749387387466941, "grad_norm": 1.7072068452835083, "learning_rate": 1.5586040269659564e-06, "loss": 0.7585, "step": 21216 }, { "epoch": 0.7494227092706489, "grad_norm": 1.7918617725372314, "learning_rate": 1.5581890872674632e-06, "loss": 0.7639, "step": 21217 }, { "epoch": 0.7494580310743568, "grad_norm": 1.7763845920562744, "learning_rate": 1.5577741926147449e-06, "loss": 0.7689, "step": 21218 }, { "epoch": 0.7494933528780647, "grad_norm": 1.728646159172058, "learning_rate": 1.5573593430132306e-06, "loss": 0.8105, "step": 21219 }, { "epoch": 0.7495286746817726, "grad_norm": 1.540248155593872, "learning_rate": 1.5569445384683513e-06, "loss": 0.7472, "step": 21220 }, { "epoch": 0.7495639964854806, "grad_norm": 1.8212422132492065, "learning_rate": 1.5565297789855333e-06, "loss": 0.7312, "step": 21221 }, { "epoch": 0.7495993182891885, "grad_norm": 1.6500896215438843, "learning_rate": 1.5561150645702067e-06, "loss": 0.7692, "step": 21222 }, { "epoch": 0.7496346400928964, "grad_norm": 0.8605778813362122, "learning_rate": 1.5557003952277998e-06, "loss": 0.5804, "step": 21223 }, { "epoch": 0.7496699618966043, "grad_norm": 1.541304111480713, "learning_rate": 1.555285770963737e-06, "loss": 0.7771, "step": 21224 }, { "epoch": 0.7497052837003122, "grad_norm": 1.752305507659912, "learning_rate": 1.554871191783447e-06, "loss": 0.7588, "step": 21225 }, { "epoch": 0.7497406055040201, "grad_norm": 1.6937847137451172, "learning_rate": 1.5544566576923564e-06, "loss": 0.7608, "step": 21226 }, { "epoch": 0.749775927307728, "grad_norm": 2.4386000633239746, "learning_rate": 1.554042168695888e-06, "loss": 0.7762, "step": 21227 }, { "epoch": 0.7498112491114359, "grad_norm": 1.882750391960144, "learning_rate": 1.553627724799468e-06, "loss": 0.779, "step": 21228 }, { "epoch": 0.7498465709151437, "grad_norm": 1.754467248916626, "learning_rate": 1.5532133260085219e-06, "loss": 0.7595, "step": 21229 }, { "epoch": 0.7498818927188516, "grad_norm": 1.7166633605957031, "learning_rate": 1.552798972328472e-06, "loss": 0.7895, "step": 21230 }, { "epoch": 0.7499172145225595, "grad_norm": 1.6502528190612793, "learning_rate": 1.5523846637647394e-06, "loss": 0.7692, "step": 21231 }, { "epoch": 0.7499525363262675, "grad_norm": 1.7895662784576416, "learning_rate": 1.551970400322748e-06, "loss": 0.7627, "step": 21232 }, { "epoch": 0.7499878581299754, "grad_norm": 1.7108426094055176, "learning_rate": 1.5515561820079216e-06, "loss": 0.7485, "step": 21233 }, { "epoch": 0.7500231799336833, "grad_norm": 1.6164801120758057, "learning_rate": 1.5511420088256784e-06, "loss": 0.7294, "step": 21234 }, { "epoch": 0.7500585017373912, "grad_norm": 1.600751519203186, "learning_rate": 1.5507278807814401e-06, "loss": 0.816, "step": 21235 }, { "epoch": 0.7500938235410991, "grad_norm": 1.5554906129837036, "learning_rate": 1.5503137978806288e-06, "loss": 0.7399, "step": 21236 }, { "epoch": 0.750129145344807, "grad_norm": 1.7562952041625977, "learning_rate": 1.5498997601286598e-06, "loss": 0.7906, "step": 21237 }, { "epoch": 0.7501644671485149, "grad_norm": 1.813659429550171, "learning_rate": 1.5494857675309544e-06, "loss": 0.7621, "step": 21238 }, { "epoch": 0.7501997889522228, "grad_norm": 1.5961458683013916, "learning_rate": 1.5490718200929322e-06, "loss": 0.7535, "step": 21239 }, { "epoch": 0.7502351107559307, "grad_norm": 1.7547498941421509, "learning_rate": 1.5486579178200084e-06, "loss": 0.7828, "step": 21240 }, { "epoch": 0.7502704325596387, "grad_norm": 1.8834917545318604, "learning_rate": 1.5482440607176002e-06, "loss": 0.7823, "step": 21241 }, { "epoch": 0.7503057543633466, "grad_norm": 1.7342119216918945, "learning_rate": 1.5478302487911267e-06, "loss": 0.7787, "step": 21242 }, { "epoch": 0.7503410761670545, "grad_norm": 10.745753288269043, "learning_rate": 1.5474164820460003e-06, "loss": 0.7398, "step": 21243 }, { "epoch": 0.7503763979707624, "grad_norm": 1.5446161031723022, "learning_rate": 1.5470027604876382e-06, "loss": 0.7682, "step": 21244 }, { "epoch": 0.7504117197744703, "grad_norm": 1.662121057510376, "learning_rate": 1.5465890841214564e-06, "loss": 0.792, "step": 21245 }, { "epoch": 0.7504470415781782, "grad_norm": 1.6804717779159546, "learning_rate": 1.5461754529528655e-06, "loss": 0.7721, "step": 21246 }, { "epoch": 0.7504823633818861, "grad_norm": 1.7318689823150635, "learning_rate": 1.5457618669872815e-06, "loss": 0.75, "step": 21247 }, { "epoch": 0.750517685185594, "grad_norm": 1.8469290733337402, "learning_rate": 1.5453483262301183e-06, "loss": 0.7441, "step": 21248 }, { "epoch": 0.7505530069893019, "grad_norm": 1.6754692792892456, "learning_rate": 1.5449348306867874e-06, "loss": 0.7609, "step": 21249 }, { "epoch": 0.7505883287930099, "grad_norm": 0.8931063413619995, "learning_rate": 1.5445213803626974e-06, "loss": 0.5751, "step": 21250 }, { "epoch": 0.7506236505967178, "grad_norm": 1.7281945943832397, "learning_rate": 1.5441079752632627e-06, "loss": 0.7704, "step": 21251 }, { "epoch": 0.7506589724004257, "grad_norm": 1.7675905227661133, "learning_rate": 1.5436946153938942e-06, "loss": 0.7892, "step": 21252 }, { "epoch": 0.7506942942041336, "grad_norm": 1.7110220193862915, "learning_rate": 1.5432813007599995e-06, "loss": 0.7664, "step": 21253 }, { "epoch": 0.7507296160078415, "grad_norm": 1.6987789869308472, "learning_rate": 1.5428680313669891e-06, "loss": 0.7816, "step": 21254 }, { "epoch": 0.7507649378115493, "grad_norm": 1.6298942565917969, "learning_rate": 1.5424548072202744e-06, "loss": 0.7565, "step": 21255 }, { "epoch": 0.7508002596152572, "grad_norm": 1.7502057552337646, "learning_rate": 1.5420416283252592e-06, "loss": 0.8078, "step": 21256 }, { "epoch": 0.7508355814189651, "grad_norm": 1.831845760345459, "learning_rate": 1.5416284946873528e-06, "loss": 0.7458, "step": 21257 }, { "epoch": 0.750870903222673, "grad_norm": 1.7494785785675049, "learning_rate": 1.5412154063119645e-06, "loss": 0.7588, "step": 21258 }, { "epoch": 0.7509062250263809, "grad_norm": 1.704695224761963, "learning_rate": 1.5408023632044972e-06, "loss": 0.7891, "step": 21259 }, { "epoch": 0.7509415468300888, "grad_norm": 1.762916088104248, "learning_rate": 1.5403893653703583e-06, "loss": 0.7876, "step": 21260 }, { "epoch": 0.7509768686337968, "grad_norm": 1.722525954246521, "learning_rate": 1.5399764128149546e-06, "loss": 0.7884, "step": 21261 }, { "epoch": 0.7510121904375047, "grad_norm": 1.6009676456451416, "learning_rate": 1.5395635055436875e-06, "loss": 0.752, "step": 21262 }, { "epoch": 0.7510475122412126, "grad_norm": 2.0558974742889404, "learning_rate": 1.539150643561963e-06, "loss": 0.7836, "step": 21263 }, { "epoch": 0.7510828340449205, "grad_norm": 1.5599064826965332, "learning_rate": 1.5387378268751858e-06, "loss": 0.7444, "step": 21264 }, { "epoch": 0.7511181558486284, "grad_norm": 1.9494842290878296, "learning_rate": 1.5383250554887557e-06, "loss": 0.8216, "step": 21265 }, { "epoch": 0.7511534776523363, "grad_norm": 2.264101266860962, "learning_rate": 1.5379123294080777e-06, "loss": 0.794, "step": 21266 }, { "epoch": 0.7511887994560442, "grad_norm": 1.7827117443084717, "learning_rate": 1.5374996486385513e-06, "loss": 0.7579, "step": 21267 }, { "epoch": 0.7512241212597521, "grad_norm": 1.6427083015441895, "learning_rate": 1.5370870131855803e-06, "loss": 0.7668, "step": 21268 }, { "epoch": 0.75125944306346, "grad_norm": 1.8233180046081543, "learning_rate": 1.5366744230545615e-06, "loss": 0.752, "step": 21269 }, { "epoch": 0.751294764867168, "grad_norm": 1.7192426919937134, "learning_rate": 1.536261878250897e-06, "loss": 0.7549, "step": 21270 }, { "epoch": 0.7513300866708759, "grad_norm": 1.8147118091583252, "learning_rate": 1.5358493787799873e-06, "loss": 0.7652, "step": 21271 }, { "epoch": 0.7513654084745838, "grad_norm": 1.594506859779358, "learning_rate": 1.5354369246472283e-06, "loss": 0.7754, "step": 21272 }, { "epoch": 0.7514007302782917, "grad_norm": 1.603705644607544, "learning_rate": 1.5350245158580196e-06, "loss": 0.7592, "step": 21273 }, { "epoch": 0.7514360520819996, "grad_norm": 2.8029136657714844, "learning_rate": 1.5346121524177604e-06, "loss": 0.7776, "step": 21274 }, { "epoch": 0.7514713738857075, "grad_norm": 1.7122931480407715, "learning_rate": 1.5341998343318443e-06, "loss": 0.8051, "step": 21275 }, { "epoch": 0.7515066956894154, "grad_norm": 1.842115044593811, "learning_rate": 1.5337875616056691e-06, "loss": 0.764, "step": 21276 }, { "epoch": 0.7515420174931233, "grad_norm": 1.7004178762435913, "learning_rate": 1.5333753342446306e-06, "loss": 0.7909, "step": 21277 }, { "epoch": 0.7515773392968312, "grad_norm": 1.716441035270691, "learning_rate": 1.5329631522541261e-06, "loss": 0.8002, "step": 21278 }, { "epoch": 0.7516126611005391, "grad_norm": 1.6936240196228027, "learning_rate": 1.5325510156395463e-06, "loss": 0.7819, "step": 21279 }, { "epoch": 0.7516479829042471, "grad_norm": 1.9595091342926025, "learning_rate": 1.5321389244062868e-06, "loss": 0.7973, "step": 21280 }, { "epoch": 0.7516833047079549, "grad_norm": 1.6431918144226074, "learning_rate": 1.5317268785597433e-06, "loss": 0.758, "step": 21281 }, { "epoch": 0.7517186265116628, "grad_norm": 1.6271740198135376, "learning_rate": 1.5313148781053044e-06, "loss": 0.7789, "step": 21282 }, { "epoch": 0.7517539483153707, "grad_norm": 3.4461302757263184, "learning_rate": 1.5309029230483647e-06, "loss": 0.7605, "step": 21283 }, { "epoch": 0.7517892701190786, "grad_norm": 1.649193286895752, "learning_rate": 1.5304910133943162e-06, "loss": 0.8118, "step": 21284 }, { "epoch": 0.7518245919227865, "grad_norm": 1.7017018795013428, "learning_rate": 1.5300791491485501e-06, "loss": 0.7544, "step": 21285 }, { "epoch": 0.7518599137264944, "grad_norm": 1.6931734085083008, "learning_rate": 1.5296673303164539e-06, "loss": 0.7616, "step": 21286 }, { "epoch": 0.7518952355302023, "grad_norm": 1.6750508546829224, "learning_rate": 1.529255556903419e-06, "loss": 0.7676, "step": 21287 }, { "epoch": 0.7519305573339102, "grad_norm": 1.6130414009094238, "learning_rate": 1.5288438289148367e-06, "loss": 0.7378, "step": 21288 }, { "epoch": 0.7519658791376181, "grad_norm": 1.6464413404464722, "learning_rate": 1.5284321463560924e-06, "loss": 0.7635, "step": 21289 }, { "epoch": 0.752001200941326, "grad_norm": 1.870185375213623, "learning_rate": 1.5280205092325755e-06, "loss": 0.8103, "step": 21290 }, { "epoch": 0.752036522745034, "grad_norm": 1.787269949913025, "learning_rate": 1.5276089175496755e-06, "loss": 0.7979, "step": 21291 }, { "epoch": 0.7520718445487419, "grad_norm": 1.8761576414108276, "learning_rate": 1.5271973713127752e-06, "loss": 0.8041, "step": 21292 }, { "epoch": 0.7521071663524498, "grad_norm": 1.8513821363449097, "learning_rate": 1.5267858705272631e-06, "loss": 0.7555, "step": 21293 }, { "epoch": 0.7521424881561577, "grad_norm": 1.6516340970993042, "learning_rate": 1.526374415198526e-06, "loss": 0.7529, "step": 21294 }, { "epoch": 0.7521778099598656, "grad_norm": 1.8174232244491577, "learning_rate": 1.5259630053319463e-06, "loss": 0.7785, "step": 21295 }, { "epoch": 0.7522131317635735, "grad_norm": 1.8852894306182861, "learning_rate": 1.5255516409329091e-06, "loss": 0.783, "step": 21296 }, { "epoch": 0.7522484535672814, "grad_norm": 1.9610099792480469, "learning_rate": 1.5251403220068012e-06, "loss": 0.7589, "step": 21297 }, { "epoch": 0.7522837753709893, "grad_norm": 1.7898823022842407, "learning_rate": 1.5247290485590016e-06, "loss": 0.7648, "step": 21298 }, { "epoch": 0.7523190971746972, "grad_norm": 1.5558968782424927, "learning_rate": 1.524317820594895e-06, "loss": 0.7207, "step": 21299 }, { "epoch": 0.7523544189784052, "grad_norm": 1.8267197608947754, "learning_rate": 1.5239066381198652e-06, "loss": 0.7747, "step": 21300 }, { "epoch": 0.7523897407821131, "grad_norm": 1.8767420053482056, "learning_rate": 1.52349550113929e-06, "loss": 0.7518, "step": 21301 }, { "epoch": 0.752425062585821, "grad_norm": 1.633347988128662, "learning_rate": 1.5230844096585524e-06, "loss": 0.7761, "step": 21302 }, { "epoch": 0.7524603843895289, "grad_norm": 1.765097737312317, "learning_rate": 1.5226733636830337e-06, "loss": 0.7448, "step": 21303 }, { "epoch": 0.7524957061932368, "grad_norm": 1.6734477281570435, "learning_rate": 1.5222623632181126e-06, "loss": 0.7896, "step": 21304 }, { "epoch": 0.7525310279969447, "grad_norm": 1.7781274318695068, "learning_rate": 1.5218514082691666e-06, "loss": 0.7646, "step": 21305 }, { "epoch": 0.7525663498006526, "grad_norm": 1.7943053245544434, "learning_rate": 1.5214404988415754e-06, "loss": 0.7496, "step": 21306 }, { "epoch": 0.7526016716043604, "grad_norm": 1.781368613243103, "learning_rate": 1.5210296349407182e-06, "loss": 0.7933, "step": 21307 }, { "epoch": 0.7526369934080683, "grad_norm": 1.6555787324905396, "learning_rate": 1.5206188165719704e-06, "loss": 0.7829, "step": 21308 }, { "epoch": 0.7526723152117762, "grad_norm": 1.6189477443695068, "learning_rate": 1.520208043740709e-06, "loss": 0.7503, "step": 21309 }, { "epoch": 0.7527076370154842, "grad_norm": 1.727073073387146, "learning_rate": 1.5197973164523123e-06, "loss": 0.7599, "step": 21310 }, { "epoch": 0.7527429588191921, "grad_norm": 1.6426314115524292, "learning_rate": 1.5193866347121523e-06, "loss": 0.7645, "step": 21311 }, { "epoch": 0.7527782806229, "grad_norm": 1.6988927125930786, "learning_rate": 1.518975998525606e-06, "loss": 0.7917, "step": 21312 }, { "epoch": 0.7528136024266079, "grad_norm": 1.615043044090271, "learning_rate": 1.5185654078980488e-06, "loss": 0.7833, "step": 21313 }, { "epoch": 0.7528489242303158, "grad_norm": 1.8636926412582397, "learning_rate": 1.5181548628348514e-06, "loss": 0.7625, "step": 21314 }, { "epoch": 0.7528842460340237, "grad_norm": 1.582571029663086, "learning_rate": 1.517744363341389e-06, "loss": 0.8041, "step": 21315 }, { "epoch": 0.7529195678377316, "grad_norm": 1.6515496969223022, "learning_rate": 1.5173339094230355e-06, "loss": 0.7647, "step": 21316 }, { "epoch": 0.7529548896414395, "grad_norm": 2.211730480194092, "learning_rate": 1.516923501085159e-06, "loss": 0.7874, "step": 21317 }, { "epoch": 0.7529902114451474, "grad_norm": 1.826037049293518, "learning_rate": 1.5165131383331332e-06, "loss": 0.8036, "step": 21318 }, { "epoch": 0.7530255332488553, "grad_norm": 1.7157325744628906, "learning_rate": 1.5161028211723305e-06, "loss": 0.7756, "step": 21319 }, { "epoch": 0.7530608550525633, "grad_norm": 1.6839070320129395, "learning_rate": 1.515692549608117e-06, "loss": 0.7755, "step": 21320 }, { "epoch": 0.7530961768562712, "grad_norm": 1.6414194107055664, "learning_rate": 1.5152823236458652e-06, "loss": 0.7538, "step": 21321 }, { "epoch": 0.7531314986599791, "grad_norm": 1.9072915315628052, "learning_rate": 1.5148721432909441e-06, "loss": 0.7845, "step": 21322 }, { "epoch": 0.753166820463687, "grad_norm": 1.8204286098480225, "learning_rate": 1.5144620085487216e-06, "loss": 0.7444, "step": 21323 }, { "epoch": 0.7532021422673949, "grad_norm": 1.6135567426681519, "learning_rate": 1.514051919424564e-06, "loss": 0.7433, "step": 21324 }, { "epoch": 0.7532374640711028, "grad_norm": 1.760237216949463, "learning_rate": 1.5136418759238387e-06, "loss": 0.7946, "step": 21325 }, { "epoch": 0.7532727858748107, "grad_norm": 0.9324070811271667, "learning_rate": 1.513231878051915e-06, "loss": 0.5779, "step": 21326 }, { "epoch": 0.7533081076785186, "grad_norm": 1.6999431848526, "learning_rate": 1.5128219258141557e-06, "loss": 0.7645, "step": 21327 }, { "epoch": 0.7533434294822265, "grad_norm": 1.7899266481399536, "learning_rate": 1.5124120192159275e-06, "loss": 0.7545, "step": 21328 }, { "epoch": 0.7533787512859345, "grad_norm": 1.5527572631835938, "learning_rate": 1.512002158262597e-06, "loss": 0.7369, "step": 21329 }, { "epoch": 0.7534140730896424, "grad_norm": 2.008288860321045, "learning_rate": 1.5115923429595247e-06, "loss": 0.7237, "step": 21330 }, { "epoch": 0.7534493948933503, "grad_norm": 1.5802892446517944, "learning_rate": 1.511182573312076e-06, "loss": 0.7601, "step": 21331 }, { "epoch": 0.7534847166970582, "grad_norm": 1.6494121551513672, "learning_rate": 1.5107728493256156e-06, "loss": 0.7661, "step": 21332 }, { "epoch": 0.753520038500766, "grad_norm": 1.6815378665924072, "learning_rate": 1.5103631710055029e-06, "loss": 0.7574, "step": 21333 }, { "epoch": 0.7535553603044739, "grad_norm": 1.874338150024414, "learning_rate": 1.5099535383571006e-06, "loss": 0.7422, "step": 21334 }, { "epoch": 0.7535906821081818, "grad_norm": 1.6472920179367065, "learning_rate": 1.5095439513857708e-06, "loss": 0.751, "step": 21335 }, { "epoch": 0.7536260039118897, "grad_norm": 1.5934545993804932, "learning_rate": 1.5091344100968751e-06, "loss": 0.7614, "step": 21336 }, { "epoch": 0.7536613257155976, "grad_norm": 1.894415020942688, "learning_rate": 1.5087249144957705e-06, "loss": 0.7761, "step": 21337 }, { "epoch": 0.7536966475193055, "grad_norm": 1.9011324644088745, "learning_rate": 1.5083154645878174e-06, "loss": 0.7656, "step": 21338 }, { "epoch": 0.7537319693230135, "grad_norm": 1.5335801839828491, "learning_rate": 1.5079060603783774e-06, "loss": 0.7882, "step": 21339 }, { "epoch": 0.7537672911267214, "grad_norm": 2.143404006958008, "learning_rate": 1.507496701872806e-06, "loss": 0.7646, "step": 21340 }, { "epoch": 0.7538026129304293, "grad_norm": 1.7907779216766357, "learning_rate": 1.5070873890764598e-06, "loss": 0.7641, "step": 21341 }, { "epoch": 0.7538379347341372, "grad_norm": 1.7303383350372314, "learning_rate": 1.5066781219946986e-06, "loss": 0.7293, "step": 21342 }, { "epoch": 0.7538732565378451, "grad_norm": 1.6304913759231567, "learning_rate": 1.5062689006328763e-06, "loss": 0.7889, "step": 21343 }, { "epoch": 0.753908578341553, "grad_norm": 1.7622507810592651, "learning_rate": 1.5058597249963497e-06, "loss": 0.7757, "step": 21344 }, { "epoch": 0.7539439001452609, "grad_norm": 1.817484736442566, "learning_rate": 1.505450595090474e-06, "loss": 0.7811, "step": 21345 }, { "epoch": 0.7539792219489688, "grad_norm": 1.755515217781067, "learning_rate": 1.5050415109206062e-06, "loss": 0.7597, "step": 21346 }, { "epoch": 0.7540145437526767, "grad_norm": 1.754730224609375, "learning_rate": 1.5046324724920957e-06, "loss": 0.7745, "step": 21347 }, { "epoch": 0.7540498655563846, "grad_norm": 1.662011742591858, "learning_rate": 1.504223479810299e-06, "loss": 0.7724, "step": 21348 }, { "epoch": 0.7540851873600926, "grad_norm": 1.6381524801254272, "learning_rate": 1.5038145328805698e-06, "loss": 0.7677, "step": 21349 }, { "epoch": 0.7541205091638005, "grad_norm": 1.6306087970733643, "learning_rate": 1.5034056317082568e-06, "loss": 0.7975, "step": 21350 }, { "epoch": 0.7541558309675084, "grad_norm": 1.6650176048278809, "learning_rate": 1.5029967762987136e-06, "loss": 0.7382, "step": 21351 }, { "epoch": 0.7541911527712163, "grad_norm": 1.512974739074707, "learning_rate": 1.5025879666572935e-06, "loss": 0.7271, "step": 21352 }, { "epoch": 0.7542264745749242, "grad_norm": 1.8706810474395752, "learning_rate": 1.5021792027893423e-06, "loss": 0.7323, "step": 21353 }, { "epoch": 0.7542617963786321, "grad_norm": 1.8447073698043823, "learning_rate": 1.5017704847002123e-06, "loss": 0.7663, "step": 21354 }, { "epoch": 0.75429711818234, "grad_norm": 1.5854463577270508, "learning_rate": 1.5013618123952545e-06, "loss": 0.7639, "step": 21355 }, { "epoch": 0.7543324399860479, "grad_norm": 1.7314836978912354, "learning_rate": 1.5009531858798138e-06, "loss": 0.7621, "step": 21356 }, { "epoch": 0.7543677617897558, "grad_norm": 1.8205938339233398, "learning_rate": 1.5005446051592405e-06, "loss": 0.7733, "step": 21357 }, { "epoch": 0.7544030835934638, "grad_norm": 1.7183899879455566, "learning_rate": 1.5001360702388824e-06, "loss": 0.7745, "step": 21358 }, { "epoch": 0.7544384053971716, "grad_norm": 1.5745841264724731, "learning_rate": 1.4997275811240863e-06, "loss": 0.7556, "step": 21359 }, { "epoch": 0.7544737272008795, "grad_norm": 1.6604716777801514, "learning_rate": 1.4993191378201954e-06, "loss": 0.7588, "step": 21360 }, { "epoch": 0.7545090490045874, "grad_norm": 1.5452532768249512, "learning_rate": 1.4989107403325582e-06, "loss": 0.751, "step": 21361 }, { "epoch": 0.7545443708082953, "grad_norm": 0.8587055206298828, "learning_rate": 1.4985023886665202e-06, "loss": 0.5742, "step": 21362 }, { "epoch": 0.7545796926120032, "grad_norm": 1.7118250131607056, "learning_rate": 1.498094082827423e-06, "loss": 0.7993, "step": 21363 }, { "epoch": 0.7546150144157111, "grad_norm": 1.998253583908081, "learning_rate": 1.4976858228206126e-06, "loss": 0.7782, "step": 21364 }, { "epoch": 0.754650336219419, "grad_norm": 1.6158690452575684, "learning_rate": 1.4972776086514328e-06, "loss": 0.7748, "step": 21365 }, { "epoch": 0.7546856580231269, "grad_norm": 1.8698968887329102, "learning_rate": 1.4968694403252244e-06, "loss": 0.7765, "step": 21366 }, { "epoch": 0.7547209798268348, "grad_norm": 1.5867033004760742, "learning_rate": 1.4964613178473298e-06, "loss": 0.725, "step": 21367 }, { "epoch": 0.7547563016305427, "grad_norm": 1.645983099937439, "learning_rate": 1.4960532412230927e-06, "loss": 0.7562, "step": 21368 }, { "epoch": 0.7547916234342507, "grad_norm": 1.765257716178894, "learning_rate": 1.4956452104578501e-06, "loss": 0.7838, "step": 21369 }, { "epoch": 0.7548269452379586, "grad_norm": 1.8534111976623535, "learning_rate": 1.4952372255569446e-06, "loss": 0.7509, "step": 21370 }, { "epoch": 0.7548622670416665, "grad_norm": 1.6135790348052979, "learning_rate": 1.494829286525717e-06, "loss": 0.7695, "step": 21371 }, { "epoch": 0.7548975888453744, "grad_norm": 1.6037043333053589, "learning_rate": 1.4944213933695033e-06, "loss": 0.7761, "step": 21372 }, { "epoch": 0.7549329106490823, "grad_norm": 1.7034276723861694, "learning_rate": 1.4940135460936433e-06, "loss": 0.7791, "step": 21373 }, { "epoch": 0.7549682324527902, "grad_norm": 1.5574284791946411, "learning_rate": 1.4936057447034764e-06, "loss": 0.7411, "step": 21374 }, { "epoch": 0.7550035542564981, "grad_norm": 1.572980523109436, "learning_rate": 1.493197989204337e-06, "loss": 0.7585, "step": 21375 }, { "epoch": 0.755038876060206, "grad_norm": 0.921442985534668, "learning_rate": 1.4927902796015631e-06, "loss": 0.5651, "step": 21376 }, { "epoch": 0.7550741978639139, "grad_norm": 1.6284960508346558, "learning_rate": 1.4923826159004922e-06, "loss": 0.7184, "step": 21377 }, { "epoch": 0.7551095196676219, "grad_norm": 0.9089444875717163, "learning_rate": 1.4919749981064585e-06, "loss": 0.5617, "step": 21378 }, { "epoch": 0.7551448414713298, "grad_norm": 1.6006804704666138, "learning_rate": 1.4915674262247948e-06, "loss": 0.7381, "step": 21379 }, { "epoch": 0.7551801632750377, "grad_norm": 1.7219151258468628, "learning_rate": 1.4911599002608373e-06, "loss": 0.7457, "step": 21380 }, { "epoch": 0.7552154850787456, "grad_norm": 1.682191252708435, "learning_rate": 1.4907524202199209e-06, "loss": 0.7472, "step": 21381 }, { "epoch": 0.7552508068824535, "grad_norm": 2.1122682094573975, "learning_rate": 1.4903449861073755e-06, "loss": 0.778, "step": 21382 }, { "epoch": 0.7552861286861614, "grad_norm": 1.8295844793319702, "learning_rate": 1.4899375979285351e-06, "loss": 0.7669, "step": 21383 }, { "epoch": 0.7553214504898693, "grad_norm": 1.6658128499984741, "learning_rate": 1.4895302556887331e-06, "loss": 0.7936, "step": 21384 }, { "epoch": 0.7553567722935771, "grad_norm": 1.8451226949691772, "learning_rate": 1.4891229593932982e-06, "loss": 0.7562, "step": 21385 }, { "epoch": 0.755392094097285, "grad_norm": 1.852702260017395, "learning_rate": 1.4887157090475613e-06, "loss": 0.7623, "step": 21386 }, { "epoch": 0.7554274159009929, "grad_norm": 1.6351239681243896, "learning_rate": 1.4883085046568551e-06, "loss": 0.7473, "step": 21387 }, { "epoch": 0.7554627377047008, "grad_norm": 2.1891109943389893, "learning_rate": 1.4879013462265057e-06, "loss": 0.7646, "step": 21388 }, { "epoch": 0.7554980595084088, "grad_norm": 2.0519332885742188, "learning_rate": 1.4874942337618431e-06, "loss": 0.8005, "step": 21389 }, { "epoch": 0.7555333813121167, "grad_norm": 1.655036211013794, "learning_rate": 1.4870871672681974e-06, "loss": 0.7837, "step": 21390 }, { "epoch": 0.7555687031158246, "grad_norm": 1.7015979290008545, "learning_rate": 1.4866801467508922e-06, "loss": 0.7174, "step": 21391 }, { "epoch": 0.7556040249195325, "grad_norm": 1.6952224969863892, "learning_rate": 1.486273172215258e-06, "loss": 0.7805, "step": 21392 }, { "epoch": 0.7556393467232404, "grad_norm": 1.5769636631011963, "learning_rate": 1.4858662436666193e-06, "loss": 0.7812, "step": 21393 }, { "epoch": 0.7556746685269483, "grad_norm": 1.584441065788269, "learning_rate": 1.485459361110304e-06, "loss": 0.7482, "step": 21394 }, { "epoch": 0.7557099903306562, "grad_norm": 1.7341537475585938, "learning_rate": 1.4850525245516363e-06, "loss": 0.7241, "step": 21395 }, { "epoch": 0.7557453121343641, "grad_norm": 1.8303908109664917, "learning_rate": 1.4846457339959385e-06, "loss": 0.7822, "step": 21396 }, { "epoch": 0.755780633938072, "grad_norm": 1.9033334255218506, "learning_rate": 1.4842389894485387e-06, "loss": 0.7895, "step": 21397 }, { "epoch": 0.75581595574178, "grad_norm": 1.5735371112823486, "learning_rate": 1.4838322909147563e-06, "loss": 0.7664, "step": 21398 }, { "epoch": 0.7558512775454879, "grad_norm": 1.5857267379760742, "learning_rate": 1.4834256383999157e-06, "loss": 0.7587, "step": 21399 }, { "epoch": 0.7558865993491958, "grad_norm": 1.7827345132827759, "learning_rate": 1.4830190319093406e-06, "loss": 0.783, "step": 21400 }, { "epoch": 0.7559219211529037, "grad_norm": 0.9368003606796265, "learning_rate": 1.4826124714483502e-06, "loss": 0.5721, "step": 21401 }, { "epoch": 0.7559572429566116, "grad_norm": 1.6501888036727905, "learning_rate": 1.4822059570222663e-06, "loss": 0.7632, "step": 21402 }, { "epoch": 0.7559925647603195, "grad_norm": 1.529376745223999, "learning_rate": 1.4817994886364101e-06, "loss": 0.7547, "step": 21403 }, { "epoch": 0.7560278865640274, "grad_norm": 1.6679257154464722, "learning_rate": 1.4813930662961017e-06, "loss": 0.7666, "step": 21404 }, { "epoch": 0.7560632083677353, "grad_norm": 1.6347482204437256, "learning_rate": 1.4809866900066582e-06, "loss": 0.7598, "step": 21405 }, { "epoch": 0.7560985301714432, "grad_norm": 1.6503866910934448, "learning_rate": 1.4805803597733998e-06, "loss": 0.7611, "step": 21406 }, { "epoch": 0.7561338519751511, "grad_norm": 1.724169373512268, "learning_rate": 1.4801740756016453e-06, "loss": 0.7675, "step": 21407 }, { "epoch": 0.7561691737788591, "grad_norm": 1.5285221338272095, "learning_rate": 1.479767837496709e-06, "loss": 0.7927, "step": 21408 }, { "epoch": 0.756204495582567, "grad_norm": 1.7394390106201172, "learning_rate": 1.4793616454639099e-06, "loss": 0.7467, "step": 21409 }, { "epoch": 0.7562398173862749, "grad_norm": 1.6852072477340698, "learning_rate": 1.4789554995085647e-06, "loss": 0.7642, "step": 21410 }, { "epoch": 0.7562751391899827, "grad_norm": 1.9545637369155884, "learning_rate": 1.4785493996359873e-06, "loss": 0.7835, "step": 21411 }, { "epoch": 0.7563104609936906, "grad_norm": 1.8253874778747559, "learning_rate": 1.4781433458514927e-06, "loss": 0.807, "step": 21412 }, { "epoch": 0.7563457827973985, "grad_norm": 1.648089051246643, "learning_rate": 1.4777373381603982e-06, "loss": 0.7814, "step": 21413 }, { "epoch": 0.7563811046011064, "grad_norm": 1.6730772256851196, "learning_rate": 1.4773313765680147e-06, "loss": 0.7677, "step": 21414 }, { "epoch": 0.7564164264048143, "grad_norm": 1.7266077995300293, "learning_rate": 1.4769254610796547e-06, "loss": 0.7501, "step": 21415 }, { "epoch": 0.7564517482085222, "grad_norm": 1.5697237253189087, "learning_rate": 1.476519591700632e-06, "loss": 0.7534, "step": 21416 }, { "epoch": 0.7564870700122301, "grad_norm": 1.6989657878875732, "learning_rate": 1.4761137684362598e-06, "loss": 0.7922, "step": 21417 }, { "epoch": 0.7565223918159381, "grad_norm": 1.7079757452011108, "learning_rate": 1.475707991291847e-06, "loss": 0.7654, "step": 21418 }, { "epoch": 0.756557713619646, "grad_norm": 1.594131588935852, "learning_rate": 1.4753022602727052e-06, "loss": 0.7645, "step": 21419 }, { "epoch": 0.7565930354233539, "grad_norm": 1.6474673748016357, "learning_rate": 1.4748965753841466e-06, "loss": 0.7701, "step": 21420 }, { "epoch": 0.7566283572270618, "grad_norm": 1.7182880640029907, "learning_rate": 1.4744909366314774e-06, "loss": 0.8219, "step": 21421 }, { "epoch": 0.7566636790307697, "grad_norm": 1.7051959037780762, "learning_rate": 1.4740853440200082e-06, "loss": 0.7625, "step": 21422 }, { "epoch": 0.7566990008344776, "grad_norm": 1.856732964515686, "learning_rate": 1.4736797975550487e-06, "loss": 0.7459, "step": 21423 }, { "epoch": 0.7567343226381855, "grad_norm": 2.7602663040161133, "learning_rate": 1.4732742972419035e-06, "loss": 0.7813, "step": 21424 }, { "epoch": 0.7567696444418934, "grad_norm": 3.0039782524108887, "learning_rate": 1.4728688430858818e-06, "loss": 0.7643, "step": 21425 }, { "epoch": 0.7568049662456013, "grad_norm": 1.7032181024551392, "learning_rate": 1.4724634350922907e-06, "loss": 0.767, "step": 21426 }, { "epoch": 0.7568402880493093, "grad_norm": 1.7147529125213623, "learning_rate": 1.4720580732664335e-06, "loss": 0.7417, "step": 21427 }, { "epoch": 0.7568756098530172, "grad_norm": 1.6023575067520142, "learning_rate": 1.471652757613617e-06, "loss": 0.7869, "step": 21428 }, { "epoch": 0.7569109316567251, "grad_norm": 1.7195003032684326, "learning_rate": 1.4712474881391475e-06, "loss": 0.7638, "step": 21429 }, { "epoch": 0.756946253460433, "grad_norm": 1.7260427474975586, "learning_rate": 1.4708422648483268e-06, "loss": 0.7665, "step": 21430 }, { "epoch": 0.7569815752641409, "grad_norm": 1.67532217502594, "learning_rate": 1.470437087746458e-06, "loss": 0.7967, "step": 21431 }, { "epoch": 0.7570168970678488, "grad_norm": 1.8810322284698486, "learning_rate": 1.4700319568388472e-06, "loss": 0.7316, "step": 21432 }, { "epoch": 0.7570522188715567, "grad_norm": 2.084564685821533, "learning_rate": 1.469626872130795e-06, "loss": 0.7442, "step": 21433 }, { "epoch": 0.7570875406752646, "grad_norm": 1.7598918676376343, "learning_rate": 1.4692218336276003e-06, "loss": 0.7755, "step": 21434 }, { "epoch": 0.7571228624789725, "grad_norm": 1.662726640701294, "learning_rate": 1.4688168413345666e-06, "loss": 0.7511, "step": 21435 }, { "epoch": 0.7571581842826804, "grad_norm": 1.7821577787399292, "learning_rate": 1.4684118952569964e-06, "loss": 0.7609, "step": 21436 }, { "epoch": 0.7571935060863882, "grad_norm": 1.8210334777832031, "learning_rate": 1.4680069954001858e-06, "loss": 0.7931, "step": 21437 }, { "epoch": 0.7572288278900962, "grad_norm": 2.073726177215576, "learning_rate": 1.4676021417694352e-06, "loss": 0.7903, "step": 21438 }, { "epoch": 0.7572641496938041, "grad_norm": 1.7109311819076538, "learning_rate": 1.4671973343700458e-06, "loss": 0.7306, "step": 21439 }, { "epoch": 0.757299471497512, "grad_norm": 1.7157306671142578, "learning_rate": 1.4667925732073119e-06, "loss": 0.7321, "step": 21440 }, { "epoch": 0.7573347933012199, "grad_norm": 1.5802628993988037, "learning_rate": 1.4663878582865327e-06, "loss": 0.7734, "step": 21441 }, { "epoch": 0.7573701151049278, "grad_norm": 1.566292643547058, "learning_rate": 1.4659831896130061e-06, "loss": 0.7488, "step": 21442 }, { "epoch": 0.7574054369086357, "grad_norm": 1.6520262956619263, "learning_rate": 1.4655785671920258e-06, "loss": 0.7547, "step": 21443 }, { "epoch": 0.7574407587123436, "grad_norm": 2.40724515914917, "learning_rate": 1.4651739910288893e-06, "loss": 0.7704, "step": 21444 }, { "epoch": 0.7574760805160515, "grad_norm": 1.7023125886917114, "learning_rate": 1.4647694611288922e-06, "loss": 0.7326, "step": 21445 }, { "epoch": 0.7575114023197594, "grad_norm": 1.7669037580490112, "learning_rate": 1.464364977497326e-06, "loss": 0.7661, "step": 21446 }, { "epoch": 0.7575467241234674, "grad_norm": 1.9157116413116455, "learning_rate": 1.4639605401394874e-06, "loss": 0.7941, "step": 21447 }, { "epoch": 0.7575820459271753, "grad_norm": 1.607059359550476, "learning_rate": 1.4635561490606693e-06, "loss": 0.7575, "step": 21448 }, { "epoch": 0.7576173677308832, "grad_norm": 2.1208102703094482, "learning_rate": 1.4631518042661624e-06, "loss": 0.7677, "step": 21449 }, { "epoch": 0.7576526895345911, "grad_norm": 1.6205133199691772, "learning_rate": 1.4627475057612595e-06, "loss": 0.7657, "step": 21450 }, { "epoch": 0.757688011338299, "grad_norm": 1.6002733707427979, "learning_rate": 1.4623432535512538e-06, "loss": 0.759, "step": 21451 }, { "epoch": 0.7577233331420069, "grad_norm": 2.650334596633911, "learning_rate": 1.4619390476414348e-06, "loss": 0.7697, "step": 21452 }, { "epoch": 0.7577586549457148, "grad_norm": 1.6705780029296875, "learning_rate": 1.4615348880370912e-06, "loss": 0.7382, "step": 21453 }, { "epoch": 0.7577939767494227, "grad_norm": 1.7569959163665771, "learning_rate": 1.4611307747435134e-06, "loss": 0.7736, "step": 21454 }, { "epoch": 0.7578292985531306, "grad_norm": 1.884864091873169, "learning_rate": 1.460726707765992e-06, "loss": 0.7633, "step": 21455 }, { "epoch": 0.7578646203568385, "grad_norm": 1.8652782440185547, "learning_rate": 1.460322687109813e-06, "loss": 0.7944, "step": 21456 }, { "epoch": 0.7578999421605465, "grad_norm": 1.6196919679641724, "learning_rate": 1.4599187127802655e-06, "loss": 0.7561, "step": 21457 }, { "epoch": 0.7579352639642544, "grad_norm": 1.8140692710876465, "learning_rate": 1.459514784782638e-06, "loss": 0.7873, "step": 21458 }, { "epoch": 0.7579705857679623, "grad_norm": 1.8349881172180176, "learning_rate": 1.4591109031222133e-06, "loss": 0.7747, "step": 21459 }, { "epoch": 0.7580059075716702, "grad_norm": 1.6498003005981445, "learning_rate": 1.4587070678042802e-06, "loss": 0.7204, "step": 21460 }, { "epoch": 0.7580412293753781, "grad_norm": 1.7459717988967896, "learning_rate": 1.4583032788341228e-06, "loss": 0.7592, "step": 21461 }, { "epoch": 0.758076551179086, "grad_norm": 1.9529014825820923, "learning_rate": 1.4578995362170283e-06, "loss": 0.7618, "step": 21462 }, { "epoch": 0.7581118729827939, "grad_norm": 1.5342397689819336, "learning_rate": 1.4574958399582766e-06, "loss": 0.7424, "step": 21463 }, { "epoch": 0.7581471947865017, "grad_norm": 1.71965491771698, "learning_rate": 1.457092190063154e-06, "loss": 0.7991, "step": 21464 }, { "epoch": 0.7581825165902096, "grad_norm": 1.6944130659103394, "learning_rate": 1.4566885865369446e-06, "loss": 0.7391, "step": 21465 }, { "epoch": 0.7582178383939175, "grad_norm": 1.8060184717178345, "learning_rate": 1.4562850293849269e-06, "loss": 0.7546, "step": 21466 }, { "epoch": 0.7582531601976255, "grad_norm": 1.9512931108474731, "learning_rate": 1.4558815186123843e-06, "loss": 0.7694, "step": 21467 }, { "epoch": 0.7582884820013334, "grad_norm": 1.781022071838379, "learning_rate": 1.4554780542246006e-06, "loss": 0.7807, "step": 21468 }, { "epoch": 0.7583238038050413, "grad_norm": 11.089824676513672, "learning_rate": 1.4550746362268532e-06, "loss": 0.78, "step": 21469 }, { "epoch": 0.7583591256087492, "grad_norm": 1.5190154314041138, "learning_rate": 1.454671264624421e-06, "loss": 0.7422, "step": 21470 }, { "epoch": 0.7583944474124571, "grad_norm": 1.6208747625350952, "learning_rate": 1.454267939422585e-06, "loss": 0.7562, "step": 21471 }, { "epoch": 0.758429769216165, "grad_norm": 1.7625665664672852, "learning_rate": 1.4538646606266255e-06, "loss": 0.7765, "step": 21472 }, { "epoch": 0.7584650910198729, "grad_norm": 2.004228353500366, "learning_rate": 1.4534614282418169e-06, "loss": 0.8104, "step": 21473 }, { "epoch": 0.7585004128235808, "grad_norm": 2.300940752029419, "learning_rate": 1.453058242273438e-06, "loss": 0.7506, "step": 21474 }, { "epoch": 0.7585357346272887, "grad_norm": 1.6245180368423462, "learning_rate": 1.4526551027267683e-06, "loss": 0.7513, "step": 21475 }, { "epoch": 0.7585710564309966, "grad_norm": 1.5642569065093994, "learning_rate": 1.4522520096070803e-06, "loss": 0.7619, "step": 21476 }, { "epoch": 0.7586063782347046, "grad_norm": 1.7855503559112549, "learning_rate": 1.4518489629196503e-06, "loss": 0.756, "step": 21477 }, { "epoch": 0.7586417000384125, "grad_norm": 1.7605061531066895, "learning_rate": 1.4514459626697564e-06, "loss": 0.7785, "step": 21478 }, { "epoch": 0.7586770218421204, "grad_norm": 1.87644624710083, "learning_rate": 1.4510430088626687e-06, "loss": 0.729, "step": 21479 }, { "epoch": 0.7587123436458283, "grad_norm": 1.663041591644287, "learning_rate": 1.4506401015036632e-06, "loss": 0.7858, "step": 21480 }, { "epoch": 0.7587476654495362, "grad_norm": 1.7684075832366943, "learning_rate": 1.4502372405980142e-06, "loss": 0.8001, "step": 21481 }, { "epoch": 0.7587829872532441, "grad_norm": 1.8535951375961304, "learning_rate": 1.4498344261509917e-06, "loss": 0.7538, "step": 21482 }, { "epoch": 0.758818309056952, "grad_norm": 1.680384635925293, "learning_rate": 1.4494316581678685e-06, "loss": 0.7776, "step": 21483 }, { "epoch": 0.7588536308606599, "grad_norm": 2.1730966567993164, "learning_rate": 1.4490289366539185e-06, "loss": 0.7502, "step": 21484 }, { "epoch": 0.7588889526643678, "grad_norm": 1.6018290519714355, "learning_rate": 1.4486262616144081e-06, "loss": 0.753, "step": 21485 }, { "epoch": 0.7589242744680758, "grad_norm": 1.7656241655349731, "learning_rate": 1.4482236330546095e-06, "loss": 0.7729, "step": 21486 }, { "epoch": 0.7589595962717837, "grad_norm": 1.4770280122756958, "learning_rate": 1.4478210509797946e-06, "loss": 0.7612, "step": 21487 }, { "epoch": 0.7589949180754916, "grad_norm": 1.5016928911209106, "learning_rate": 1.4474185153952296e-06, "loss": 0.759, "step": 21488 }, { "epoch": 0.7590302398791995, "grad_norm": 1.7909907102584839, "learning_rate": 1.4470160263061816e-06, "loss": 0.7557, "step": 21489 }, { "epoch": 0.7590655616829073, "grad_norm": 1.7297310829162598, "learning_rate": 1.4466135837179202e-06, "loss": 0.7636, "step": 21490 }, { "epoch": 0.7591008834866152, "grad_norm": 1.6352295875549316, "learning_rate": 1.4462111876357137e-06, "loss": 0.7744, "step": 21491 }, { "epoch": 0.7591362052903231, "grad_norm": 2.3592522144317627, "learning_rate": 1.4458088380648256e-06, "loss": 0.7939, "step": 21492 }, { "epoch": 0.759171527094031, "grad_norm": 1.7393348217010498, "learning_rate": 1.445406535010523e-06, "loss": 0.7659, "step": 21493 }, { "epoch": 0.7592068488977389, "grad_norm": 2.0775208473205566, "learning_rate": 1.4450042784780732e-06, "loss": 0.7406, "step": 21494 }, { "epoch": 0.7592421707014468, "grad_norm": 1.8219642639160156, "learning_rate": 1.444602068472738e-06, "loss": 0.7653, "step": 21495 }, { "epoch": 0.7592774925051547, "grad_norm": 1.759854793548584, "learning_rate": 1.4441999049997824e-06, "loss": 0.7421, "step": 21496 }, { "epoch": 0.7593128143088627, "grad_norm": 1.8397986888885498, "learning_rate": 1.443797788064471e-06, "loss": 0.7818, "step": 21497 }, { "epoch": 0.7593481361125706, "grad_norm": 1.8113977909088135, "learning_rate": 1.4433957176720648e-06, "loss": 0.7638, "step": 21498 }, { "epoch": 0.7593834579162785, "grad_norm": 1.7506446838378906, "learning_rate": 1.4429936938278266e-06, "loss": 0.794, "step": 21499 }, { "epoch": 0.7594187797199864, "grad_norm": 1.6245909929275513, "learning_rate": 1.4425917165370201e-06, "loss": 0.7323, "step": 21500 }, { "epoch": 0.7594541015236943, "grad_norm": 1.7263095378875732, "learning_rate": 1.4421897858049028e-06, "loss": 0.7604, "step": 21501 }, { "epoch": 0.7594894233274022, "grad_norm": 1.625576376914978, "learning_rate": 1.4417879016367364e-06, "loss": 0.7483, "step": 21502 }, { "epoch": 0.7595247451311101, "grad_norm": 1.7875046730041504, "learning_rate": 1.441386064037783e-06, "loss": 0.7536, "step": 21503 }, { "epoch": 0.759560066934818, "grad_norm": 1.7868428230285645, "learning_rate": 1.4409842730132983e-06, "loss": 0.8039, "step": 21504 }, { "epoch": 0.759595388738526, "grad_norm": 1.6918538808822632, "learning_rate": 1.440582528568542e-06, "loss": 0.7514, "step": 21505 }, { "epoch": 0.7596307105422339, "grad_norm": 1.868628978729248, "learning_rate": 1.4401808307087744e-06, "loss": 0.7575, "step": 21506 }, { "epoch": 0.7596660323459418, "grad_norm": 1.801370620727539, "learning_rate": 1.4397791794392513e-06, "loss": 0.7911, "step": 21507 }, { "epoch": 0.7597013541496497, "grad_norm": 1.593145489692688, "learning_rate": 1.4393775747652272e-06, "loss": 0.7869, "step": 21508 }, { "epoch": 0.7597366759533576, "grad_norm": 1.635585069656372, "learning_rate": 1.4389760166919598e-06, "loss": 0.7597, "step": 21509 }, { "epoch": 0.7597719977570655, "grad_norm": 1.8876042366027832, "learning_rate": 1.4385745052247062e-06, "loss": 0.7613, "step": 21510 }, { "epoch": 0.7598073195607734, "grad_norm": 1.655007004737854, "learning_rate": 1.4381730403687195e-06, "loss": 0.7689, "step": 21511 }, { "epoch": 0.7598426413644813, "grad_norm": 1.6011046171188354, "learning_rate": 1.4377716221292536e-06, "loss": 0.7507, "step": 21512 }, { "epoch": 0.7598779631681892, "grad_norm": 1.6700608730316162, "learning_rate": 1.437370250511565e-06, "loss": 0.7603, "step": 21513 }, { "epoch": 0.7599132849718971, "grad_norm": 1.611796498298645, "learning_rate": 1.436968925520903e-06, "loss": 0.7351, "step": 21514 }, { "epoch": 0.759948606775605, "grad_norm": 1.783019781112671, "learning_rate": 1.4365676471625223e-06, "loss": 0.7561, "step": 21515 }, { "epoch": 0.7599839285793129, "grad_norm": 1.5805246829986572, "learning_rate": 1.4361664154416755e-06, "loss": 0.7634, "step": 21516 }, { "epoch": 0.7600192503830208, "grad_norm": 1.7622778415679932, "learning_rate": 1.4357652303636115e-06, "loss": 0.7772, "step": 21517 }, { "epoch": 0.7600545721867287, "grad_norm": 1.7715933322906494, "learning_rate": 1.435364091933582e-06, "loss": 0.8161, "step": 21518 }, { "epoch": 0.7600898939904366, "grad_norm": 1.6485666036605835, "learning_rate": 1.4349630001568377e-06, "loss": 0.7679, "step": 21519 }, { "epoch": 0.7601252157941445, "grad_norm": 1.7014219760894775, "learning_rate": 1.4345619550386287e-06, "loss": 0.7657, "step": 21520 }, { "epoch": 0.7601605375978524, "grad_norm": 1.8830978870391846, "learning_rate": 1.4341609565842007e-06, "loss": 0.7656, "step": 21521 }, { "epoch": 0.7601958594015603, "grad_norm": 1.6874346733093262, "learning_rate": 1.4337600047988043e-06, "loss": 0.7636, "step": 21522 }, { "epoch": 0.7602311812052682, "grad_norm": 1.8996158838272095, "learning_rate": 1.433359099687688e-06, "loss": 0.7891, "step": 21523 }, { "epoch": 0.7602665030089761, "grad_norm": 1.7491722106933594, "learning_rate": 1.4329582412560954e-06, "loss": 0.7696, "step": 21524 }, { "epoch": 0.760301824812684, "grad_norm": 5.69916296005249, "learning_rate": 1.4325574295092766e-06, "loss": 0.7369, "step": 21525 }, { "epoch": 0.760337146616392, "grad_norm": 1.6946982145309448, "learning_rate": 1.4321566644524758e-06, "loss": 0.793, "step": 21526 }, { "epoch": 0.7603724684200999, "grad_norm": 2.0082101821899414, "learning_rate": 1.4317559460909358e-06, "loss": 0.8031, "step": 21527 }, { "epoch": 0.7604077902238078, "grad_norm": 1.7176549434661865, "learning_rate": 1.4313552744299037e-06, "loss": 0.7507, "step": 21528 }, { "epoch": 0.7604431120275157, "grad_norm": 1.742813229560852, "learning_rate": 1.430954649474623e-06, "loss": 0.8186, "step": 21529 }, { "epoch": 0.7604784338312236, "grad_norm": 0.9179311394691467, "learning_rate": 1.4305540712303384e-06, "loss": 0.5888, "step": 21530 }, { "epoch": 0.7605137556349315, "grad_norm": 1.7942034006118774, "learning_rate": 1.4301535397022892e-06, "loss": 0.739, "step": 21531 }, { "epoch": 0.7605490774386394, "grad_norm": 1.7227948904037476, "learning_rate": 1.42975305489572e-06, "loss": 0.7791, "step": 21532 }, { "epoch": 0.7605843992423473, "grad_norm": 1.5554124116897583, "learning_rate": 1.4293526168158728e-06, "loss": 0.7544, "step": 21533 }, { "epoch": 0.7606197210460552, "grad_norm": 1.718492031097412, "learning_rate": 1.4289522254679861e-06, "loss": 0.8128, "step": 21534 }, { "epoch": 0.7606550428497632, "grad_norm": 1.6382094621658325, "learning_rate": 1.4285518808573012e-06, "loss": 0.7657, "step": 21535 }, { "epoch": 0.7606903646534711, "grad_norm": 1.7986315488815308, "learning_rate": 1.4281515829890597e-06, "loss": 0.7956, "step": 21536 }, { "epoch": 0.760725686457179, "grad_norm": 1.8471927642822266, "learning_rate": 1.4277513318684966e-06, "loss": 0.7799, "step": 21537 }, { "epoch": 0.7607610082608869, "grad_norm": 1.785759449005127, "learning_rate": 1.4273511275008533e-06, "loss": 0.7553, "step": 21538 }, { "epoch": 0.7607963300645948, "grad_norm": 1.7085936069488525, "learning_rate": 1.426950969891368e-06, "loss": 0.7649, "step": 21539 }, { "epoch": 0.7608316518683027, "grad_norm": 1.64336097240448, "learning_rate": 1.4265508590452754e-06, "loss": 0.7577, "step": 21540 }, { "epoch": 0.7608669736720106, "grad_norm": 1.7717806100845337, "learning_rate": 1.4261507949678133e-06, "loss": 0.7829, "step": 21541 }, { "epoch": 0.7609022954757184, "grad_norm": 1.5616213083267212, "learning_rate": 1.42575077766422e-06, "loss": 0.7636, "step": 21542 }, { "epoch": 0.7609376172794263, "grad_norm": 1.6539782285690308, "learning_rate": 1.4253508071397277e-06, "loss": 0.765, "step": 21543 }, { "epoch": 0.7609729390831342, "grad_norm": 1.8507065773010254, "learning_rate": 1.4249508833995706e-06, "loss": 0.7528, "step": 21544 }, { "epoch": 0.7610082608868421, "grad_norm": 1.6121171712875366, "learning_rate": 1.4245510064489849e-06, "loss": 0.7694, "step": 21545 }, { "epoch": 0.7610435826905501, "grad_norm": 1.5092997550964355, "learning_rate": 1.4241511762932047e-06, "loss": 0.7501, "step": 21546 }, { "epoch": 0.761078904494258, "grad_norm": 1.5388396978378296, "learning_rate": 1.4237513929374602e-06, "loss": 0.7489, "step": 21547 }, { "epoch": 0.7611142262979659, "grad_norm": 1.7729724645614624, "learning_rate": 1.4233516563869854e-06, "loss": 0.7983, "step": 21548 }, { "epoch": 0.7611495481016738, "grad_norm": 1.5878331661224365, "learning_rate": 1.4229519666470131e-06, "loss": 0.7869, "step": 21549 }, { "epoch": 0.7611848699053817, "grad_norm": 1.6235744953155518, "learning_rate": 1.4225523237227718e-06, "loss": 0.7684, "step": 21550 }, { "epoch": 0.7612201917090896, "grad_norm": 1.6794170141220093, "learning_rate": 1.422152727619493e-06, "loss": 0.769, "step": 21551 }, { "epoch": 0.7612555135127975, "grad_norm": 1.7935062646865845, "learning_rate": 1.4217531783424083e-06, "loss": 0.802, "step": 21552 }, { "epoch": 0.7612908353165054, "grad_norm": 1.7766577005386353, "learning_rate": 1.4213536758967432e-06, "loss": 0.7904, "step": 21553 }, { "epoch": 0.7613261571202133, "grad_norm": 1.6600981950759888, "learning_rate": 1.420954220287729e-06, "loss": 0.7722, "step": 21554 }, { "epoch": 0.7613614789239213, "grad_norm": 1.5912572145462036, "learning_rate": 1.4205548115205952e-06, "loss": 0.742, "step": 21555 }, { "epoch": 0.7613968007276292, "grad_norm": 1.6462424993515015, "learning_rate": 1.4201554496005648e-06, "loss": 0.7463, "step": 21556 }, { "epoch": 0.7614321225313371, "grad_norm": 1.5938572883605957, "learning_rate": 1.4197561345328676e-06, "loss": 0.7883, "step": 21557 }, { "epoch": 0.761467444335045, "grad_norm": 1.7821952104568481, "learning_rate": 1.4193568663227303e-06, "loss": 0.785, "step": 21558 }, { "epoch": 0.7615027661387529, "grad_norm": 1.5454193353652954, "learning_rate": 1.4189576449753756e-06, "loss": 0.7723, "step": 21559 }, { "epoch": 0.7615380879424608, "grad_norm": 1.7474377155303955, "learning_rate": 1.4185584704960303e-06, "loss": 0.7496, "step": 21560 }, { "epoch": 0.7615734097461687, "grad_norm": 1.809154748916626, "learning_rate": 1.4181593428899205e-06, "loss": 0.7815, "step": 21561 }, { "epoch": 0.7616087315498766, "grad_norm": 1.730168104171753, "learning_rate": 1.4177602621622672e-06, "loss": 0.7627, "step": 21562 }, { "epoch": 0.7616440533535845, "grad_norm": 2.4066500663757324, "learning_rate": 1.4173612283182931e-06, "loss": 0.751, "step": 21563 }, { "epoch": 0.7616793751572924, "grad_norm": 1.7157859802246094, "learning_rate": 1.4169622413632212e-06, "loss": 0.7676, "step": 21564 }, { "epoch": 0.7617146969610004, "grad_norm": 1.899766445159912, "learning_rate": 1.416563301302276e-06, "loss": 0.8096, "step": 21565 }, { "epoch": 0.7617500187647083, "grad_norm": 1.622188925743103, "learning_rate": 1.416164408140675e-06, "loss": 0.7628, "step": 21566 }, { "epoch": 0.7617853405684162, "grad_norm": 1.7909126281738281, "learning_rate": 1.4157655618836402e-06, "loss": 0.7531, "step": 21567 }, { "epoch": 0.761820662372124, "grad_norm": 1.7972959280014038, "learning_rate": 1.4153667625363944e-06, "loss": 0.7761, "step": 21568 }, { "epoch": 0.7618559841758319, "grad_norm": 1.622902512550354, "learning_rate": 1.4149680101041524e-06, "loss": 0.7443, "step": 21569 }, { "epoch": 0.7618913059795398, "grad_norm": 1.6101844310760498, "learning_rate": 1.414569304592135e-06, "loss": 0.7377, "step": 21570 }, { "epoch": 0.7619266277832477, "grad_norm": 1.739865779876709, "learning_rate": 1.414170646005562e-06, "loss": 0.7656, "step": 21571 }, { "epoch": 0.7619619495869556, "grad_norm": 1.9157483577728271, "learning_rate": 1.4137720343496485e-06, "loss": 0.7506, "step": 21572 }, { "epoch": 0.7619972713906635, "grad_norm": 1.8052929639816284, "learning_rate": 1.4133734696296126e-06, "loss": 0.8005, "step": 21573 }, { "epoch": 0.7620325931943714, "grad_norm": 1.8457854986190796, "learning_rate": 1.4129749518506713e-06, "loss": 0.7574, "step": 21574 }, { "epoch": 0.7620679149980794, "grad_norm": 1.99285089969635, "learning_rate": 1.4125764810180386e-06, "loss": 0.8127, "step": 21575 }, { "epoch": 0.7621032368017873, "grad_norm": 1.6539556980133057, "learning_rate": 1.4121780571369304e-06, "loss": 0.7706, "step": 21576 }, { "epoch": 0.7621385586054952, "grad_norm": 1.8053990602493286, "learning_rate": 1.4117796802125616e-06, "loss": 0.7461, "step": 21577 }, { "epoch": 0.7621738804092031, "grad_norm": 1.627444863319397, "learning_rate": 1.4113813502501473e-06, "loss": 0.7565, "step": 21578 }, { "epoch": 0.762209202212911, "grad_norm": 1.5800585746765137, "learning_rate": 1.4109830672548974e-06, "loss": 0.7674, "step": 21579 }, { "epoch": 0.7622445240166189, "grad_norm": 1.8027926683425903, "learning_rate": 1.410584831232028e-06, "loss": 0.7788, "step": 21580 }, { "epoch": 0.7622798458203268, "grad_norm": 1.782273292541504, "learning_rate": 1.41018664218675e-06, "loss": 0.7618, "step": 21581 }, { "epoch": 0.7623151676240347, "grad_norm": 3.106006383895874, "learning_rate": 1.409788500124273e-06, "loss": 0.7606, "step": 21582 }, { "epoch": 0.7623504894277426, "grad_norm": 1.6592096090316772, "learning_rate": 1.4093904050498085e-06, "loss": 0.7656, "step": 21583 }, { "epoch": 0.7623858112314505, "grad_norm": 1.6140016317367554, "learning_rate": 1.4089923569685692e-06, "loss": 0.748, "step": 21584 }, { "epoch": 0.7624211330351585, "grad_norm": 1.543765664100647, "learning_rate": 1.4085943558857617e-06, "loss": 0.7679, "step": 21585 }, { "epoch": 0.7624564548388664, "grad_norm": 2.0759806632995605, "learning_rate": 1.4081964018065957e-06, "loss": 0.76, "step": 21586 }, { "epoch": 0.7624917766425743, "grad_norm": 1.7398128509521484, "learning_rate": 1.4077984947362806e-06, "loss": 0.7851, "step": 21587 }, { "epoch": 0.7625270984462822, "grad_norm": 1.8720098733901978, "learning_rate": 1.4074006346800246e-06, "loss": 0.7405, "step": 21588 }, { "epoch": 0.7625624202499901, "grad_norm": 1.6304309368133545, "learning_rate": 1.4070028216430327e-06, "loss": 0.7671, "step": 21589 }, { "epoch": 0.762597742053698, "grad_norm": 1.73465096950531, "learning_rate": 1.4066050556305117e-06, "loss": 0.7963, "step": 21590 }, { "epoch": 0.7626330638574059, "grad_norm": 2.8796300888061523, "learning_rate": 1.4062073366476703e-06, "loss": 0.7718, "step": 21591 }, { "epoch": 0.7626683856611138, "grad_norm": 1.6931030750274658, "learning_rate": 1.4058096646997105e-06, "loss": 0.7763, "step": 21592 }, { "epoch": 0.7627037074648217, "grad_norm": 2.157865524291992, "learning_rate": 1.4054120397918374e-06, "loss": 0.784, "step": 21593 }, { "epoch": 0.7627390292685295, "grad_norm": 1.6400057077407837, "learning_rate": 1.4050144619292578e-06, "loss": 0.7425, "step": 21594 }, { "epoch": 0.7627743510722375, "grad_norm": 1.8271257877349854, "learning_rate": 1.4046169311171715e-06, "loss": 0.7701, "step": 21595 }, { "epoch": 0.7628096728759454, "grad_norm": 1.6889060735702515, "learning_rate": 1.4042194473607828e-06, "loss": 0.7778, "step": 21596 }, { "epoch": 0.7628449946796533, "grad_norm": 1.924464225769043, "learning_rate": 1.4038220106652961e-06, "loss": 0.7568, "step": 21597 }, { "epoch": 0.7628803164833612, "grad_norm": 1.7089446783065796, "learning_rate": 1.4034246210359087e-06, "loss": 0.7673, "step": 21598 }, { "epoch": 0.7629156382870691, "grad_norm": 1.8162537813186646, "learning_rate": 1.403027278477826e-06, "loss": 0.7599, "step": 21599 }, { "epoch": 0.762950960090777, "grad_norm": 1.6499407291412354, "learning_rate": 1.4026299829962442e-06, "loss": 0.766, "step": 21600 }, { "epoch": 0.7629862818944849, "grad_norm": 1.5574558973312378, "learning_rate": 1.4022327345963665e-06, "loss": 0.7609, "step": 21601 }, { "epoch": 0.7630216036981928, "grad_norm": 1.9871054887771606, "learning_rate": 1.4018355332833887e-06, "loss": 0.7472, "step": 21602 }, { "epoch": 0.7630569255019007, "grad_norm": 1.664331316947937, "learning_rate": 1.4014383790625113e-06, "loss": 0.7809, "step": 21603 }, { "epoch": 0.7630922473056087, "grad_norm": 3.210149049758911, "learning_rate": 1.4010412719389338e-06, "loss": 0.7561, "step": 21604 }, { "epoch": 0.7631275691093166, "grad_norm": 1.759919285774231, "learning_rate": 1.4006442119178497e-06, "loss": 0.7474, "step": 21605 }, { "epoch": 0.7631628909130245, "grad_norm": 1.7552906274795532, "learning_rate": 1.4002471990044575e-06, "loss": 0.7474, "step": 21606 }, { "epoch": 0.7631982127167324, "grad_norm": 1.7965832948684692, "learning_rate": 1.3998502332039549e-06, "loss": 0.7568, "step": 21607 }, { "epoch": 0.7632335345204403, "grad_norm": 1.6143450736999512, "learning_rate": 1.3994533145215344e-06, "loss": 0.7495, "step": 21608 }, { "epoch": 0.7632688563241482, "grad_norm": 1.6431487798690796, "learning_rate": 1.3990564429623915e-06, "loss": 0.758, "step": 21609 }, { "epoch": 0.7633041781278561, "grad_norm": 2.428107500076294, "learning_rate": 1.3986596185317231e-06, "loss": 0.7581, "step": 21610 }, { "epoch": 0.763339499931564, "grad_norm": 1.695932388305664, "learning_rate": 1.3982628412347188e-06, "loss": 0.7604, "step": 21611 }, { "epoch": 0.7633748217352719, "grad_norm": 1.6808058023452759, "learning_rate": 1.3978661110765735e-06, "loss": 0.7546, "step": 21612 }, { "epoch": 0.7634101435389798, "grad_norm": 1.6831717491149902, "learning_rate": 1.3974694280624806e-06, "loss": 0.8094, "step": 21613 }, { "epoch": 0.7634454653426878, "grad_norm": 1.7745702266693115, "learning_rate": 1.3970727921976296e-06, "loss": 0.7791, "step": 21614 }, { "epoch": 0.7634807871463957, "grad_norm": 1.853880763053894, "learning_rate": 1.3966762034872122e-06, "loss": 0.7768, "step": 21615 }, { "epoch": 0.7635161089501036, "grad_norm": 1.9255462884902954, "learning_rate": 1.396279661936421e-06, "loss": 0.7566, "step": 21616 }, { "epoch": 0.7635514307538115, "grad_norm": 1.6167908906936646, "learning_rate": 1.3958831675504447e-06, "loss": 0.7484, "step": 21617 }, { "epoch": 0.7635867525575194, "grad_norm": 1.6273751258850098, "learning_rate": 1.3954867203344697e-06, "loss": 0.7643, "step": 21618 }, { "epoch": 0.7636220743612273, "grad_norm": 1.5896552801132202, "learning_rate": 1.395090320293687e-06, "loss": 0.738, "step": 21619 }, { "epoch": 0.7636573961649351, "grad_norm": 1.7067950963974, "learning_rate": 1.3946939674332866e-06, "loss": 0.7455, "step": 21620 }, { "epoch": 0.763692717968643, "grad_norm": 1.6944167613983154, "learning_rate": 1.394297661758452e-06, "loss": 0.7923, "step": 21621 }, { "epoch": 0.7637280397723509, "grad_norm": 1.6627477407455444, "learning_rate": 1.3939014032743714e-06, "loss": 0.7386, "step": 21622 }, { "epoch": 0.7637633615760588, "grad_norm": 1.7694711685180664, "learning_rate": 1.3935051919862337e-06, "loss": 0.7776, "step": 21623 }, { "epoch": 0.7637986833797668, "grad_norm": 2.454366683959961, "learning_rate": 1.3931090278992198e-06, "loss": 0.7845, "step": 21624 }, { "epoch": 0.7638340051834747, "grad_norm": 3.2488505840301514, "learning_rate": 1.3927129110185172e-06, "loss": 0.7682, "step": 21625 }, { "epoch": 0.7638693269871826, "grad_norm": 1.6276369094848633, "learning_rate": 1.3923168413493115e-06, "loss": 0.7945, "step": 21626 }, { "epoch": 0.7639046487908905, "grad_norm": 1.6088645458221436, "learning_rate": 1.3919208188967837e-06, "loss": 0.7734, "step": 21627 }, { "epoch": 0.7639399705945984, "grad_norm": 1.9519131183624268, "learning_rate": 1.3915248436661172e-06, "loss": 0.7941, "step": 21628 }, { "epoch": 0.7639752923983063, "grad_norm": 1.668731689453125, "learning_rate": 1.3911289156624974e-06, "loss": 0.749, "step": 21629 }, { "epoch": 0.7640106142020142, "grad_norm": 1.6246105432510376, "learning_rate": 1.3907330348911018e-06, "loss": 0.7368, "step": 21630 }, { "epoch": 0.7640459360057221, "grad_norm": 1.55767822265625, "learning_rate": 1.3903372013571142e-06, "loss": 0.7609, "step": 21631 }, { "epoch": 0.76408125780943, "grad_norm": 1.703104853630066, "learning_rate": 1.3899414150657164e-06, "loss": 0.7704, "step": 21632 }, { "epoch": 0.764116579613138, "grad_norm": 1.6691142320632935, "learning_rate": 1.389545676022085e-06, "loss": 0.747, "step": 21633 }, { "epoch": 0.7641519014168459, "grad_norm": 1.7465569972991943, "learning_rate": 1.389149984231401e-06, "loss": 0.7735, "step": 21634 }, { "epoch": 0.7641872232205538, "grad_norm": 1.6924771070480347, "learning_rate": 1.3887543396988435e-06, "loss": 0.7753, "step": 21635 }, { "epoch": 0.7642225450242617, "grad_norm": 1.582077145576477, "learning_rate": 1.3883587424295935e-06, "loss": 0.7357, "step": 21636 }, { "epoch": 0.7642578668279696, "grad_norm": 1.6244944334030151, "learning_rate": 1.3879631924288216e-06, "loss": 0.7236, "step": 21637 }, { "epoch": 0.7642931886316775, "grad_norm": 4.087419509887695, "learning_rate": 1.3875676897017093e-06, "loss": 0.7904, "step": 21638 }, { "epoch": 0.7643285104353854, "grad_norm": 1.7978235483169556, "learning_rate": 1.3871722342534332e-06, "loss": 0.7885, "step": 21639 }, { "epoch": 0.7643638322390933, "grad_norm": 1.959293246269226, "learning_rate": 1.3867768260891661e-06, "loss": 0.7818, "step": 21640 }, { "epoch": 0.7643991540428012, "grad_norm": 1.8059443235397339, "learning_rate": 1.386381465214085e-06, "loss": 0.7831, "step": 21641 }, { "epoch": 0.7644344758465091, "grad_norm": 1.9137861728668213, "learning_rate": 1.385986151633365e-06, "loss": 0.7686, "step": 21642 }, { "epoch": 0.764469797650217, "grad_norm": 1.5884766578674316, "learning_rate": 1.3855908853521783e-06, "loss": 0.7548, "step": 21643 }, { "epoch": 0.764505119453925, "grad_norm": 1.8924976587295532, "learning_rate": 1.3851956663756977e-06, "loss": 0.7655, "step": 21644 }, { "epoch": 0.7645404412576329, "grad_norm": 1.6648238897323608, "learning_rate": 1.384800494709097e-06, "loss": 0.7787, "step": 21645 }, { "epoch": 0.7645757630613407, "grad_norm": 1.5655168294906616, "learning_rate": 1.3844053703575494e-06, "loss": 0.7927, "step": 21646 }, { "epoch": 0.7646110848650486, "grad_norm": 1.655003547668457, "learning_rate": 1.3840102933262235e-06, "loss": 0.7427, "step": 21647 }, { "epoch": 0.7646464066687565, "grad_norm": 1.709741234779358, "learning_rate": 1.3836152636202905e-06, "loss": 0.7516, "step": 21648 }, { "epoch": 0.7646817284724644, "grad_norm": 1.7749104499816895, "learning_rate": 1.383220281244923e-06, "loss": 0.7937, "step": 21649 }, { "epoch": 0.7647170502761723, "grad_norm": 0.9644708633422852, "learning_rate": 1.382825346205287e-06, "loss": 0.5542, "step": 21650 }, { "epoch": 0.7647523720798802, "grad_norm": 1.858386754989624, "learning_rate": 1.3824304585065529e-06, "loss": 0.779, "step": 21651 }, { "epoch": 0.7647876938835881, "grad_norm": 1.7358266115188599, "learning_rate": 1.3820356181538907e-06, "loss": 0.7335, "step": 21652 }, { "epoch": 0.764823015687296, "grad_norm": 1.6845535039901733, "learning_rate": 1.3816408251524642e-06, "loss": 0.7785, "step": 21653 }, { "epoch": 0.764858337491004, "grad_norm": 1.7417945861816406, "learning_rate": 1.3812460795074445e-06, "loss": 0.7725, "step": 21654 }, { "epoch": 0.7648936592947119, "grad_norm": 1.6917492151260376, "learning_rate": 1.3808513812239937e-06, "loss": 0.7364, "step": 21655 }, { "epoch": 0.7649289810984198, "grad_norm": 2.2355785369873047, "learning_rate": 1.3804567303072819e-06, "loss": 0.7732, "step": 21656 }, { "epoch": 0.7649643029021277, "grad_norm": 1.7549293041229248, "learning_rate": 1.3800621267624702e-06, "loss": 0.7787, "step": 21657 }, { "epoch": 0.7649996247058356, "grad_norm": 1.912550687789917, "learning_rate": 1.379667570594725e-06, "loss": 0.7613, "step": 21658 }, { "epoch": 0.7650349465095435, "grad_norm": 1.7551701068878174, "learning_rate": 1.3792730618092115e-06, "loss": 0.7971, "step": 21659 }, { "epoch": 0.7650702683132514, "grad_norm": 1.7395988702774048, "learning_rate": 1.37887860041109e-06, "loss": 0.744, "step": 21660 }, { "epoch": 0.7651055901169593, "grad_norm": 1.7639009952545166, "learning_rate": 1.3784841864055243e-06, "loss": 0.8365, "step": 21661 }, { "epoch": 0.7651409119206672, "grad_norm": 1.8143069744110107, "learning_rate": 1.3780898197976789e-06, "loss": 0.7877, "step": 21662 }, { "epoch": 0.7651762337243752, "grad_norm": 1.88116455078125, "learning_rate": 1.3776955005927112e-06, "loss": 0.7574, "step": 21663 }, { "epoch": 0.7652115555280831, "grad_norm": 1.8073594570159912, "learning_rate": 1.3773012287957844e-06, "loss": 0.7494, "step": 21664 }, { "epoch": 0.765246877331791, "grad_norm": 1.699638843536377, "learning_rate": 1.3769070044120592e-06, "loss": 0.7568, "step": 21665 }, { "epoch": 0.7652821991354989, "grad_norm": 1.7841837406158447, "learning_rate": 1.3765128274466927e-06, "loss": 0.7772, "step": 21666 }, { "epoch": 0.7653175209392068, "grad_norm": 1.6358423233032227, "learning_rate": 1.3761186979048446e-06, "loss": 0.757, "step": 21667 }, { "epoch": 0.7653528427429147, "grad_norm": 1.5933200120925903, "learning_rate": 1.375724615791676e-06, "loss": 0.7957, "step": 21668 }, { "epoch": 0.7653881645466226, "grad_norm": 1.6326961517333984, "learning_rate": 1.3753305811123408e-06, "loss": 0.7633, "step": 21669 }, { "epoch": 0.7654234863503305, "grad_norm": 1.6554204225540161, "learning_rate": 1.3749365938719972e-06, "loss": 0.7686, "step": 21670 }, { "epoch": 0.7654588081540384, "grad_norm": 1.68815279006958, "learning_rate": 1.3745426540758038e-06, "loss": 0.7737, "step": 21671 }, { "epoch": 0.7654941299577462, "grad_norm": 2.1037659645080566, "learning_rate": 1.3741487617289145e-06, "loss": 0.791, "step": 21672 }, { "epoch": 0.7655294517614541, "grad_norm": 1.7924377918243408, "learning_rate": 1.3737549168364834e-06, "loss": 0.7644, "step": 21673 }, { "epoch": 0.7655647735651621, "grad_norm": 1.5631643533706665, "learning_rate": 1.3733611194036662e-06, "loss": 0.768, "step": 21674 }, { "epoch": 0.76560009536887, "grad_norm": 1.5443438291549683, "learning_rate": 1.3729673694356182e-06, "loss": 0.7521, "step": 21675 }, { "epoch": 0.7656354171725779, "grad_norm": 1.731661319732666, "learning_rate": 1.3725736669374901e-06, "loss": 0.7763, "step": 21676 }, { "epoch": 0.7656707389762858, "grad_norm": 1.93068265914917, "learning_rate": 1.3721800119144358e-06, "loss": 0.7439, "step": 21677 }, { "epoch": 0.7657060607799937, "grad_norm": 1.689121961593628, "learning_rate": 1.3717864043716094e-06, "loss": 0.7485, "step": 21678 }, { "epoch": 0.7657413825837016, "grad_norm": 1.7338793277740479, "learning_rate": 1.3713928443141588e-06, "loss": 0.7598, "step": 21679 }, { "epoch": 0.7657767043874095, "grad_norm": 1.7554336786270142, "learning_rate": 1.3709993317472365e-06, "loss": 0.7512, "step": 21680 }, { "epoch": 0.7658120261911174, "grad_norm": 0.8511718511581421, "learning_rate": 1.370605866675994e-06, "loss": 0.5377, "step": 21681 }, { "epoch": 0.7658473479948253, "grad_norm": 1.5938568115234375, "learning_rate": 1.3702124491055784e-06, "loss": 0.7671, "step": 21682 }, { "epoch": 0.7658826697985333, "grad_norm": 1.6932272911071777, "learning_rate": 1.3698190790411402e-06, "loss": 0.7591, "step": 21683 }, { "epoch": 0.7659179916022412, "grad_norm": 1.7747952938079834, "learning_rate": 1.3694257564878287e-06, "loss": 0.7361, "step": 21684 }, { "epoch": 0.7659533134059491, "grad_norm": 1.690449595451355, "learning_rate": 1.3690324814507894e-06, "loss": 0.8083, "step": 21685 }, { "epoch": 0.765988635209657, "grad_norm": 1.562005639076233, "learning_rate": 1.36863925393517e-06, "loss": 0.7516, "step": 21686 }, { "epoch": 0.7660239570133649, "grad_norm": 2.0290842056274414, "learning_rate": 1.368246073946119e-06, "loss": 0.8005, "step": 21687 }, { "epoch": 0.7660592788170728, "grad_norm": 1.7885489463806152, "learning_rate": 1.3678529414887792e-06, "loss": 0.8088, "step": 21688 }, { "epoch": 0.7660946006207807, "grad_norm": 1.6084611415863037, "learning_rate": 1.3674598565682974e-06, "loss": 0.7325, "step": 21689 }, { "epoch": 0.7661299224244886, "grad_norm": 1.7299726009368896, "learning_rate": 1.3670668191898195e-06, "loss": 0.7995, "step": 21690 }, { "epoch": 0.7661652442281965, "grad_norm": 2.0720229148864746, "learning_rate": 1.366673829358488e-06, "loss": 0.7586, "step": 21691 }, { "epoch": 0.7662005660319045, "grad_norm": 1.6578054428100586, "learning_rate": 1.3662808870794453e-06, "loss": 0.7536, "step": 21692 }, { "epoch": 0.7662358878356124, "grad_norm": 1.580795168876648, "learning_rate": 1.365887992357835e-06, "loss": 0.7398, "step": 21693 }, { "epoch": 0.7662712096393203, "grad_norm": 1.6541757583618164, "learning_rate": 1.3654951451988013e-06, "loss": 0.7935, "step": 21694 }, { "epoch": 0.7663065314430282, "grad_norm": 1.6032726764678955, "learning_rate": 1.3651023456074825e-06, "loss": 0.7482, "step": 21695 }, { "epoch": 0.7663418532467361, "grad_norm": 1.7415395975112915, "learning_rate": 1.3647095935890204e-06, "loss": 0.8065, "step": 21696 }, { "epoch": 0.766377175050444, "grad_norm": 1.9159135818481445, "learning_rate": 1.3643168891485575e-06, "loss": 0.7498, "step": 21697 }, { "epoch": 0.7664124968541518, "grad_norm": 1.6508921384811401, "learning_rate": 1.3639242322912305e-06, "loss": 0.7145, "step": 21698 }, { "epoch": 0.7664478186578597, "grad_norm": 1.5572694540023804, "learning_rate": 1.3635316230221796e-06, "loss": 0.7553, "step": 21699 }, { "epoch": 0.7664831404615676, "grad_norm": 0.9870365262031555, "learning_rate": 1.3631390613465445e-06, "loss": 0.5948, "step": 21700 }, { "epoch": 0.7665184622652755, "grad_norm": 1.666320562362671, "learning_rate": 1.36274654726946e-06, "loss": 0.7768, "step": 21701 }, { "epoch": 0.7665537840689834, "grad_norm": 1.5182287693023682, "learning_rate": 1.3623540807960656e-06, "loss": 0.7896, "step": 21702 }, { "epoch": 0.7665891058726914, "grad_norm": 1.5413057804107666, "learning_rate": 1.3619616619314967e-06, "loss": 0.7439, "step": 21703 }, { "epoch": 0.7666244276763993, "grad_norm": 2.050353765487671, "learning_rate": 1.3615692906808915e-06, "loss": 0.7822, "step": 21704 }, { "epoch": 0.7666597494801072, "grad_norm": 0.9279449582099915, "learning_rate": 1.3611769670493818e-06, "loss": 0.566, "step": 21705 }, { "epoch": 0.7666950712838151, "grad_norm": 1.8278656005859375, "learning_rate": 1.3607846910421035e-06, "loss": 0.77, "step": 21706 }, { "epoch": 0.766730393087523, "grad_norm": 1.708642840385437, "learning_rate": 1.3603924626641934e-06, "loss": 0.7688, "step": 21707 }, { "epoch": 0.7667657148912309, "grad_norm": 1.7228692770004272, "learning_rate": 1.3600002819207809e-06, "loss": 0.729, "step": 21708 }, { "epoch": 0.7668010366949388, "grad_norm": 1.6864986419677734, "learning_rate": 1.3596081488170016e-06, "loss": 0.7756, "step": 21709 }, { "epoch": 0.7668363584986467, "grad_norm": 1.591833472251892, "learning_rate": 1.3592160633579866e-06, "loss": 0.7804, "step": 21710 }, { "epoch": 0.7668716803023546, "grad_norm": 1.6283254623413086, "learning_rate": 1.3588240255488662e-06, "loss": 0.7582, "step": 21711 }, { "epoch": 0.7669070021060626, "grad_norm": 1.6493144035339355, "learning_rate": 1.3584320353947722e-06, "loss": 0.7614, "step": 21712 }, { "epoch": 0.7669423239097705, "grad_norm": 1.820668339729309, "learning_rate": 1.3580400929008347e-06, "loss": 0.7438, "step": 21713 }, { "epoch": 0.7669776457134784, "grad_norm": 1.864109992980957, "learning_rate": 1.3576481980721861e-06, "loss": 0.7725, "step": 21714 }, { "epoch": 0.7670129675171863, "grad_norm": 1.6672263145446777, "learning_rate": 1.3572563509139513e-06, "loss": 0.7659, "step": 21715 }, { "epoch": 0.7670482893208942, "grad_norm": 1.9707008600234985, "learning_rate": 1.3568645514312612e-06, "loss": 0.7604, "step": 21716 }, { "epoch": 0.7670836111246021, "grad_norm": 1.7376124858856201, "learning_rate": 1.3564727996292438e-06, "loss": 0.8061, "step": 21717 }, { "epoch": 0.76711893292831, "grad_norm": 1.614759922027588, "learning_rate": 1.3560810955130243e-06, "loss": 0.7538, "step": 21718 }, { "epoch": 0.7671542547320179, "grad_norm": 1.6554100513458252, "learning_rate": 1.3556894390877307e-06, "loss": 0.7364, "step": 21719 }, { "epoch": 0.7671895765357258, "grad_norm": 1.683359980583191, "learning_rate": 1.3552978303584897e-06, "loss": 0.7461, "step": 21720 }, { "epoch": 0.7672248983394337, "grad_norm": 1.7423588037490845, "learning_rate": 1.3549062693304243e-06, "loss": 0.7749, "step": 21721 }, { "epoch": 0.7672602201431417, "grad_norm": 1.6443992853164673, "learning_rate": 1.3545147560086601e-06, "loss": 0.7295, "step": 21722 }, { "epoch": 0.7672955419468496, "grad_norm": 1.7761210203170776, "learning_rate": 1.3541232903983237e-06, "loss": 0.7855, "step": 21723 }, { "epoch": 0.7673308637505574, "grad_norm": 1.8392547369003296, "learning_rate": 1.353731872504534e-06, "loss": 0.7415, "step": 21724 }, { "epoch": 0.7673661855542653, "grad_norm": 1.5784270763397217, "learning_rate": 1.353340502332417e-06, "loss": 0.7545, "step": 21725 }, { "epoch": 0.7674015073579732, "grad_norm": 2.1715753078460693, "learning_rate": 1.3529491798870946e-06, "loss": 0.7918, "step": 21726 }, { "epoch": 0.7674368291616811, "grad_norm": 1.585695505142212, "learning_rate": 1.352557905173687e-06, "loss": 0.7606, "step": 21727 }, { "epoch": 0.767472150965389, "grad_norm": 1.7562028169631958, "learning_rate": 1.352166678197317e-06, "loss": 0.7576, "step": 21728 }, { "epoch": 0.7675074727690969, "grad_norm": 1.86366868019104, "learning_rate": 1.3517754989631027e-06, "loss": 0.7846, "step": 21729 }, { "epoch": 0.7675427945728048, "grad_norm": 1.7854708433151245, "learning_rate": 1.3513843674761662e-06, "loss": 0.7436, "step": 21730 }, { "epoch": 0.7675781163765127, "grad_norm": 1.6780003309249878, "learning_rate": 1.3509932837416235e-06, "loss": 0.7646, "step": 21731 }, { "epoch": 0.7676134381802207, "grad_norm": 1.9062793254852295, "learning_rate": 1.3506022477645947e-06, "loss": 0.77, "step": 21732 }, { "epoch": 0.7676487599839286, "grad_norm": 1.7998319864273071, "learning_rate": 1.3502112595501992e-06, "loss": 0.7901, "step": 21733 }, { "epoch": 0.7676840817876365, "grad_norm": 1.9830845594406128, "learning_rate": 1.3498203191035514e-06, "loss": 0.7477, "step": 21734 }, { "epoch": 0.7677194035913444, "grad_norm": 2.2008960247039795, "learning_rate": 1.3494294264297692e-06, "loss": 0.759, "step": 21735 }, { "epoch": 0.7677547253950523, "grad_norm": 1.6270933151245117, "learning_rate": 1.3490385815339696e-06, "loss": 0.7765, "step": 21736 }, { "epoch": 0.7677900471987602, "grad_norm": 1.7282735109329224, "learning_rate": 1.3486477844212653e-06, "loss": 0.7342, "step": 21737 }, { "epoch": 0.7678253690024681, "grad_norm": 1.5939973592758179, "learning_rate": 1.3482570350967728e-06, "loss": 0.7518, "step": 21738 }, { "epoch": 0.767860690806176, "grad_norm": 1.5638928413391113, "learning_rate": 1.3478663335656067e-06, "loss": 0.7544, "step": 21739 }, { "epoch": 0.7678960126098839, "grad_norm": 1.7327097654342651, "learning_rate": 1.3474756798328786e-06, "loss": 0.7783, "step": 21740 }, { "epoch": 0.7679313344135918, "grad_norm": 1.9815725088119507, "learning_rate": 1.3470850739037017e-06, "loss": 0.7218, "step": 21741 }, { "epoch": 0.7679666562172998, "grad_norm": 2.975797176361084, "learning_rate": 1.3466945157831907e-06, "loss": 0.7549, "step": 21742 }, { "epoch": 0.7680019780210077, "grad_norm": 1.8221867084503174, "learning_rate": 1.3463040054764532e-06, "loss": 0.7745, "step": 21743 }, { "epoch": 0.7680372998247156, "grad_norm": 1.885923981666565, "learning_rate": 1.3459135429886021e-06, "loss": 0.7692, "step": 21744 }, { "epoch": 0.7680726216284235, "grad_norm": 1.7384318113327026, "learning_rate": 1.345523128324749e-06, "loss": 0.7956, "step": 21745 }, { "epoch": 0.7681079434321314, "grad_norm": 1.8245242834091187, "learning_rate": 1.3451327614900028e-06, "loss": 0.7556, "step": 21746 }, { "epoch": 0.7681432652358393, "grad_norm": 1.5801889896392822, "learning_rate": 1.34474244248947e-06, "loss": 0.742, "step": 21747 }, { "epoch": 0.7681785870395472, "grad_norm": 1.6002426147460938, "learning_rate": 1.3443521713282609e-06, "loss": 0.7421, "step": 21748 }, { "epoch": 0.7682139088432551, "grad_norm": 1.7152408361434937, "learning_rate": 1.3439619480114851e-06, "loss": 0.779, "step": 21749 }, { "epoch": 0.7682492306469629, "grad_norm": 2.163501024246216, "learning_rate": 1.3435717725442464e-06, "loss": 0.7801, "step": 21750 }, { "epoch": 0.7682845524506708, "grad_norm": 1.7728807926177979, "learning_rate": 1.3431816449316537e-06, "loss": 0.7733, "step": 21751 }, { "epoch": 0.7683198742543788, "grad_norm": 1.7036408185958862, "learning_rate": 1.3427915651788132e-06, "loss": 0.7872, "step": 21752 }, { "epoch": 0.7683551960580867, "grad_norm": 1.746397614479065, "learning_rate": 1.3424015332908275e-06, "loss": 0.7875, "step": 21753 }, { "epoch": 0.7683905178617946, "grad_norm": 1.7602022886276245, "learning_rate": 1.3420115492728032e-06, "loss": 0.7816, "step": 21754 }, { "epoch": 0.7684258396655025, "grad_norm": 1.8789857625961304, "learning_rate": 1.3416216131298454e-06, "loss": 0.7726, "step": 21755 }, { "epoch": 0.7684611614692104, "grad_norm": 1.7170902490615845, "learning_rate": 1.3412317248670554e-06, "loss": 0.7431, "step": 21756 }, { "epoch": 0.7684964832729183, "grad_norm": 1.5952372550964355, "learning_rate": 1.340841884489536e-06, "loss": 0.8083, "step": 21757 }, { "epoch": 0.7685318050766262, "grad_norm": 1.508661150932312, "learning_rate": 1.3404520920023922e-06, "loss": 0.7669, "step": 21758 }, { "epoch": 0.7685671268803341, "grad_norm": 1.6604466438293457, "learning_rate": 1.3400623474107216e-06, "loss": 0.7766, "step": 21759 }, { "epoch": 0.768602448684042, "grad_norm": 1.8545646667480469, "learning_rate": 1.3396726507196273e-06, "loss": 0.7727, "step": 21760 }, { "epoch": 0.76863777048775, "grad_norm": 1.8420023918151855, "learning_rate": 1.3392830019342107e-06, "loss": 0.82, "step": 21761 }, { "epoch": 0.7686730922914579, "grad_norm": 1.6954522132873535, "learning_rate": 1.338893401059569e-06, "loss": 0.8002, "step": 21762 }, { "epoch": 0.7687084140951658, "grad_norm": 1.7085893154144287, "learning_rate": 1.3385038481008017e-06, "loss": 0.7559, "step": 21763 }, { "epoch": 0.7687437358988737, "grad_norm": 1.7740041017532349, "learning_rate": 1.3381143430630078e-06, "loss": 0.746, "step": 21764 }, { "epoch": 0.7687790577025816, "grad_norm": 1.760981798171997, "learning_rate": 1.3377248859512882e-06, "loss": 0.7747, "step": 21765 }, { "epoch": 0.7688143795062895, "grad_norm": 1.7826639413833618, "learning_rate": 1.3373354767707335e-06, "loss": 0.7704, "step": 21766 }, { "epoch": 0.7688497013099974, "grad_norm": 2.0312678813934326, "learning_rate": 1.3369461155264434e-06, "loss": 0.7738, "step": 21767 }, { "epoch": 0.7688850231137053, "grad_norm": 1.5281994342803955, "learning_rate": 1.336556802223516e-06, "loss": 0.7351, "step": 21768 }, { "epoch": 0.7689203449174132, "grad_norm": 1.7748280763626099, "learning_rate": 1.336167536867043e-06, "loss": 0.7738, "step": 21769 }, { "epoch": 0.7689556667211211, "grad_norm": 2.1044671535491943, "learning_rate": 1.3357783194621204e-06, "loss": 0.746, "step": 21770 }, { "epoch": 0.7689909885248291, "grad_norm": 1.625185489654541, "learning_rate": 1.3353891500138439e-06, "loss": 0.7917, "step": 21771 }, { "epoch": 0.769026310328537, "grad_norm": 1.5693062543869019, "learning_rate": 1.335000028527304e-06, "loss": 0.7801, "step": 21772 }, { "epoch": 0.7690616321322449, "grad_norm": 1.8011564016342163, "learning_rate": 1.3346109550075948e-06, "loss": 0.7957, "step": 21773 }, { "epoch": 0.7690969539359528, "grad_norm": 1.74485445022583, "learning_rate": 1.334221929459808e-06, "loss": 0.7926, "step": 21774 }, { "epoch": 0.7691322757396607, "grad_norm": 1.6636825799942017, "learning_rate": 1.333832951889037e-06, "loss": 0.757, "step": 21775 }, { "epoch": 0.7691675975433685, "grad_norm": 1.6349273920059204, "learning_rate": 1.3334440223003698e-06, "loss": 0.7641, "step": 21776 }, { "epoch": 0.7692029193470764, "grad_norm": 1.6361647844314575, "learning_rate": 1.3330551406988984e-06, "loss": 0.7524, "step": 21777 }, { "epoch": 0.7692382411507843, "grad_norm": 1.777213454246521, "learning_rate": 1.3326663070897127e-06, "loss": 0.7635, "step": 21778 }, { "epoch": 0.7692735629544922, "grad_norm": 1.7711067199707031, "learning_rate": 1.3322775214779005e-06, "loss": 0.8023, "step": 21779 }, { "epoch": 0.7693088847582001, "grad_norm": 1.6965405941009521, "learning_rate": 1.33188878386855e-06, "loss": 0.7954, "step": 21780 }, { "epoch": 0.769344206561908, "grad_norm": 1.5932987928390503, "learning_rate": 1.331500094266751e-06, "loss": 0.7335, "step": 21781 }, { "epoch": 0.769379528365616, "grad_norm": 2.334082841873169, "learning_rate": 1.3311114526775881e-06, "loss": 0.7776, "step": 21782 }, { "epoch": 0.7694148501693239, "grad_norm": 0.8696109652519226, "learning_rate": 1.33072285910615e-06, "loss": 0.5401, "step": 21783 }, { "epoch": 0.7694501719730318, "grad_norm": 1.6523966789245605, "learning_rate": 1.3303343135575197e-06, "loss": 0.7571, "step": 21784 }, { "epoch": 0.7694854937767397, "grad_norm": 1.9112019538879395, "learning_rate": 1.3299458160367861e-06, "loss": 0.7696, "step": 21785 }, { "epoch": 0.7695208155804476, "grad_norm": 2.007373571395874, "learning_rate": 1.3295573665490297e-06, "loss": 0.7618, "step": 21786 }, { "epoch": 0.7695561373841555, "grad_norm": 1.6599609851837158, "learning_rate": 1.3291689650993372e-06, "loss": 0.771, "step": 21787 }, { "epoch": 0.7695914591878634, "grad_norm": 1.6309374570846558, "learning_rate": 1.328780611692792e-06, "loss": 0.7517, "step": 21788 }, { "epoch": 0.7696267809915713, "grad_norm": 1.6585825681686401, "learning_rate": 1.328392306334475e-06, "loss": 0.7798, "step": 21789 }, { "epoch": 0.7696621027952792, "grad_norm": 1.9013299942016602, "learning_rate": 1.3280040490294693e-06, "loss": 0.7552, "step": 21790 }, { "epoch": 0.7696974245989872, "grad_norm": 2.0217971801757812, "learning_rate": 1.3276158397828577e-06, "loss": 0.7798, "step": 21791 }, { "epoch": 0.7697327464026951, "grad_norm": 1.552456259727478, "learning_rate": 1.327227678599718e-06, "loss": 0.7371, "step": 21792 }, { "epoch": 0.769768068206403, "grad_norm": 1.5590624809265137, "learning_rate": 1.3268395654851324e-06, "loss": 0.7821, "step": 21793 }, { "epoch": 0.7698033900101109, "grad_norm": 1.6691073179244995, "learning_rate": 1.3264515004441813e-06, "loss": 0.7787, "step": 21794 }, { "epoch": 0.7698387118138188, "grad_norm": 1.841822862625122, "learning_rate": 1.3260634834819413e-06, "loss": 0.7712, "step": 21795 }, { "epoch": 0.7698740336175267, "grad_norm": 1.5836105346679688, "learning_rate": 1.3256755146034917e-06, "loss": 0.7537, "step": 21796 }, { "epoch": 0.7699093554212346, "grad_norm": 1.5815367698669434, "learning_rate": 1.3252875938139115e-06, "loss": 0.7449, "step": 21797 }, { "epoch": 0.7699446772249425, "grad_norm": 1.7118967771530151, "learning_rate": 1.3248997211182752e-06, "loss": 0.7567, "step": 21798 }, { "epoch": 0.7699799990286504, "grad_norm": 1.7791168689727783, "learning_rate": 1.3245118965216602e-06, "loss": 0.7605, "step": 21799 }, { "epoch": 0.7700153208323584, "grad_norm": 1.6396220922470093, "learning_rate": 1.3241241200291443e-06, "loss": 0.7781, "step": 21800 }, { "epoch": 0.7700506426360663, "grad_norm": 1.9023460149765015, "learning_rate": 1.3237363916457996e-06, "loss": 0.7655, "step": 21801 }, { "epoch": 0.7700859644397741, "grad_norm": 1.9351726770401, "learning_rate": 1.323348711376703e-06, "loss": 0.7893, "step": 21802 }, { "epoch": 0.770121286243482, "grad_norm": 1.7147001028060913, "learning_rate": 1.3229610792269259e-06, "loss": 0.752, "step": 21803 }, { "epoch": 0.7701566080471899, "grad_norm": 1.6138290166854858, "learning_rate": 1.322573495201545e-06, "loss": 0.7453, "step": 21804 }, { "epoch": 0.7701919298508978, "grad_norm": 1.748423457145691, "learning_rate": 1.3221859593056292e-06, "loss": 0.769, "step": 21805 }, { "epoch": 0.7702272516546057, "grad_norm": 1.7276467084884644, "learning_rate": 1.3217984715442522e-06, "loss": 0.7786, "step": 21806 }, { "epoch": 0.7702625734583136, "grad_norm": 2.044947862625122, "learning_rate": 1.3214110319224865e-06, "loss": 0.762, "step": 21807 }, { "epoch": 0.7702978952620215, "grad_norm": 1.65165114402771, "learning_rate": 1.3210236404454008e-06, "loss": 0.7538, "step": 21808 }, { "epoch": 0.7703332170657294, "grad_norm": 1.6517223119735718, "learning_rate": 1.3206362971180658e-06, "loss": 0.7579, "step": 21809 }, { "epoch": 0.7703685388694373, "grad_norm": 1.6090445518493652, "learning_rate": 1.3202490019455527e-06, "loss": 0.7229, "step": 21810 }, { "epoch": 0.7704038606731453, "grad_norm": 1.5997259616851807, "learning_rate": 1.3198617549329274e-06, "loss": 0.7438, "step": 21811 }, { "epoch": 0.7704391824768532, "grad_norm": 1.7593541145324707, "learning_rate": 1.31947455608526e-06, "loss": 0.7841, "step": 21812 }, { "epoch": 0.7704745042805611, "grad_norm": 1.8308112621307373, "learning_rate": 1.319087405407619e-06, "loss": 0.7566, "step": 21813 }, { "epoch": 0.770509826084269, "grad_norm": 2.3767809867858887, "learning_rate": 1.3187003029050687e-06, "loss": 0.7923, "step": 21814 }, { "epoch": 0.7705451478879769, "grad_norm": 1.7340165376663208, "learning_rate": 1.3183132485826766e-06, "loss": 0.7726, "step": 21815 }, { "epoch": 0.7705804696916848, "grad_norm": 1.665442705154419, "learning_rate": 1.3179262424455108e-06, "loss": 0.766, "step": 21816 }, { "epoch": 0.7706157914953927, "grad_norm": 1.7012276649475098, "learning_rate": 1.3175392844986319e-06, "loss": 0.7594, "step": 21817 }, { "epoch": 0.7706511132991006, "grad_norm": 1.684710144996643, "learning_rate": 1.317152374747107e-06, "loss": 0.7677, "step": 21818 }, { "epoch": 0.7706864351028085, "grad_norm": 1.7415192127227783, "learning_rate": 1.316765513196001e-06, "loss": 0.761, "step": 21819 }, { "epoch": 0.7707217569065165, "grad_norm": 1.6299432516098022, "learning_rate": 1.3163786998503753e-06, "loss": 0.7634, "step": 21820 }, { "epoch": 0.7707570787102244, "grad_norm": 1.8062207698822021, "learning_rate": 1.3159919347152915e-06, "loss": 0.8058, "step": 21821 }, { "epoch": 0.7707924005139323, "grad_norm": 1.576102614402771, "learning_rate": 1.3156052177958128e-06, "loss": 0.7171, "step": 21822 }, { "epoch": 0.7708277223176402, "grad_norm": 1.6799769401550293, "learning_rate": 1.3152185490970022e-06, "loss": 0.7403, "step": 21823 }, { "epoch": 0.7708630441213481, "grad_norm": 1.8952478170394897, "learning_rate": 1.3148319286239163e-06, "loss": 0.768, "step": 21824 }, { "epoch": 0.770898365925056, "grad_norm": 1.6448029279708862, "learning_rate": 1.3144453563816178e-06, "loss": 0.813, "step": 21825 }, { "epoch": 0.7709336877287639, "grad_norm": 1.6259878873825073, "learning_rate": 1.3140588323751674e-06, "loss": 0.803, "step": 21826 }, { "epoch": 0.7709690095324718, "grad_norm": 1.624173641204834, "learning_rate": 1.3136723566096205e-06, "loss": 0.7559, "step": 21827 }, { "epoch": 0.7710043313361796, "grad_norm": 1.6667100191116333, "learning_rate": 1.3132859290900363e-06, "loss": 0.799, "step": 21828 }, { "epoch": 0.7710396531398875, "grad_norm": 1.7392991781234741, "learning_rate": 1.3128995498214747e-06, "loss": 0.7855, "step": 21829 }, { "epoch": 0.7710749749435954, "grad_norm": 1.7983626127243042, "learning_rate": 1.3125132188089895e-06, "loss": 0.8164, "step": 21830 }, { "epoch": 0.7711102967473034, "grad_norm": 1.6256674528121948, "learning_rate": 1.3121269360576378e-06, "loss": 0.7468, "step": 21831 }, { "epoch": 0.7711456185510113, "grad_norm": 2.3968217372894287, "learning_rate": 1.3117407015724753e-06, "loss": 0.7682, "step": 21832 }, { "epoch": 0.7711809403547192, "grad_norm": 1.7051703929901123, "learning_rate": 1.3113545153585593e-06, "loss": 0.7687, "step": 21833 }, { "epoch": 0.7712162621584271, "grad_norm": 1.6573209762573242, "learning_rate": 1.3109683774209402e-06, "loss": 0.7739, "step": 21834 }, { "epoch": 0.771251583962135, "grad_norm": 1.8040707111358643, "learning_rate": 1.3105822877646734e-06, "loss": 0.7876, "step": 21835 }, { "epoch": 0.7712869057658429, "grad_norm": 1.6018528938293457, "learning_rate": 1.3101962463948136e-06, "loss": 0.7737, "step": 21836 }, { "epoch": 0.7713222275695508, "grad_norm": 1.57651948928833, "learning_rate": 1.3098102533164104e-06, "loss": 0.7681, "step": 21837 }, { "epoch": 0.7713575493732587, "grad_norm": 1.6351137161254883, "learning_rate": 1.3094243085345172e-06, "loss": 0.7842, "step": 21838 }, { "epoch": 0.7713928711769666, "grad_norm": 1.841516375541687, "learning_rate": 1.3090384120541871e-06, "loss": 0.7615, "step": 21839 }, { "epoch": 0.7714281929806746, "grad_norm": 1.716493844985962, "learning_rate": 1.308652563880466e-06, "loss": 0.7731, "step": 21840 }, { "epoch": 0.7714635147843825, "grad_norm": 1.7194225788116455, "learning_rate": 1.308266764018406e-06, "loss": 0.7673, "step": 21841 }, { "epoch": 0.7714988365880904, "grad_norm": 1.8586581945419312, "learning_rate": 1.3078810124730568e-06, "loss": 0.716, "step": 21842 }, { "epoch": 0.7715341583917983, "grad_norm": 1.6913982629776, "learning_rate": 1.307495309249468e-06, "loss": 0.7785, "step": 21843 }, { "epoch": 0.7715694801955062, "grad_norm": 1.5796022415161133, "learning_rate": 1.3071096543526857e-06, "loss": 0.7803, "step": 21844 }, { "epoch": 0.7716048019992141, "grad_norm": 1.4030863046646118, "learning_rate": 1.3067240477877574e-06, "loss": 0.7387, "step": 21845 }, { "epoch": 0.771640123802922, "grad_norm": 1.5844271183013916, "learning_rate": 1.306338489559732e-06, "loss": 0.7548, "step": 21846 }, { "epoch": 0.7716754456066299, "grad_norm": 1.6163524389266968, "learning_rate": 1.305952979673653e-06, "loss": 0.7945, "step": 21847 }, { "epoch": 0.7717107674103378, "grad_norm": 1.750006079673767, "learning_rate": 1.3055675181345668e-06, "loss": 0.7854, "step": 21848 }, { "epoch": 0.7717460892140457, "grad_norm": 1.796478271484375, "learning_rate": 1.30518210494752e-06, "loss": 0.8027, "step": 21849 }, { "epoch": 0.7717814110177537, "grad_norm": 1.5735770463943481, "learning_rate": 1.3047967401175538e-06, "loss": 0.7726, "step": 21850 }, { "epoch": 0.7718167328214616, "grad_norm": 1.6007845401763916, "learning_rate": 1.304411423649713e-06, "loss": 0.7734, "step": 21851 }, { "epoch": 0.7718520546251695, "grad_norm": 1.7230697870254517, "learning_rate": 1.3040261555490423e-06, "loss": 0.7797, "step": 21852 }, { "epoch": 0.7718873764288774, "grad_norm": 1.4900761842727661, "learning_rate": 1.3036409358205814e-06, "loss": 0.769, "step": 21853 }, { "epoch": 0.7719226982325852, "grad_norm": 2.67698335647583, "learning_rate": 1.3032557644693733e-06, "loss": 0.8044, "step": 21854 }, { "epoch": 0.7719580200362931, "grad_norm": 1.6778943538665771, "learning_rate": 1.3028706415004594e-06, "loss": 0.771, "step": 21855 }, { "epoch": 0.771993341840001, "grad_norm": 1.6665650606155396, "learning_rate": 1.302485566918879e-06, "loss": 0.7803, "step": 21856 }, { "epoch": 0.7720286636437089, "grad_norm": 1.690981388092041, "learning_rate": 1.3021005407296732e-06, "loss": 0.788, "step": 21857 }, { "epoch": 0.7720639854474168, "grad_norm": 1.7996803522109985, "learning_rate": 1.3017155629378792e-06, "loss": 0.7948, "step": 21858 }, { "epoch": 0.7720993072511247, "grad_norm": 1.5843405723571777, "learning_rate": 1.301330633548538e-06, "loss": 0.7737, "step": 21859 }, { "epoch": 0.7721346290548327, "grad_norm": 1.6632314920425415, "learning_rate": 1.3009457525666852e-06, "loss": 0.7641, "step": 21860 }, { "epoch": 0.7721699508585406, "grad_norm": 2.8553922176361084, "learning_rate": 1.300560919997359e-06, "loss": 0.7913, "step": 21861 }, { "epoch": 0.7722052726622485, "grad_norm": 1.7310996055603027, "learning_rate": 1.3001761358455972e-06, "loss": 0.7718, "step": 21862 }, { "epoch": 0.7722405944659564, "grad_norm": 1.576851725578308, "learning_rate": 1.2997914001164335e-06, "loss": 0.7767, "step": 21863 }, { "epoch": 0.7722759162696643, "grad_norm": 1.5911885499954224, "learning_rate": 1.2994067128149041e-06, "loss": 0.7859, "step": 21864 }, { "epoch": 0.7723112380733722, "grad_norm": 2.035660982131958, "learning_rate": 1.2990220739460453e-06, "loss": 0.7982, "step": 21865 }, { "epoch": 0.7723465598770801, "grad_norm": 1.8478893041610718, "learning_rate": 1.298637483514889e-06, "loss": 0.746, "step": 21866 }, { "epoch": 0.772381881680788, "grad_norm": 1.6720645427703857, "learning_rate": 1.2982529415264689e-06, "loss": 0.7497, "step": 21867 }, { "epoch": 0.7724172034844959, "grad_norm": 1.6738239526748657, "learning_rate": 1.2978684479858207e-06, "loss": 0.7784, "step": 21868 }, { "epoch": 0.7724525252882039, "grad_norm": 2.0135395526885986, "learning_rate": 1.2974840028979719e-06, "loss": 0.7553, "step": 21869 }, { "epoch": 0.7724878470919118, "grad_norm": 1.7353793382644653, "learning_rate": 1.2970996062679568e-06, "loss": 0.7961, "step": 21870 }, { "epoch": 0.7725231688956197, "grad_norm": 1.8144690990447998, "learning_rate": 1.296715258100808e-06, "loss": 0.7746, "step": 21871 }, { "epoch": 0.7725584906993276, "grad_norm": 1.6059156656265259, "learning_rate": 1.2963309584015515e-06, "loss": 0.7591, "step": 21872 }, { "epoch": 0.7725938125030355, "grad_norm": 1.7005528211593628, "learning_rate": 1.2959467071752196e-06, "loss": 0.7863, "step": 21873 }, { "epoch": 0.7726291343067434, "grad_norm": 1.6187269687652588, "learning_rate": 1.2955625044268426e-06, "loss": 0.7659, "step": 21874 }, { "epoch": 0.7726644561104513, "grad_norm": 2.0450403690338135, "learning_rate": 1.2951783501614457e-06, "loss": 0.7949, "step": 21875 }, { "epoch": 0.7726997779141592, "grad_norm": 1.870343804359436, "learning_rate": 1.2947942443840595e-06, "loss": 0.8186, "step": 21876 }, { "epoch": 0.7727350997178671, "grad_norm": 1.5970065593719482, "learning_rate": 1.2944101870997077e-06, "loss": 0.7451, "step": 21877 }, { "epoch": 0.772770421521575, "grad_norm": 1.7432105541229248, "learning_rate": 1.2940261783134211e-06, "loss": 0.7754, "step": 21878 }, { "epoch": 0.772805743325283, "grad_norm": 1.6541357040405273, "learning_rate": 1.2936422180302211e-06, "loss": 0.7602, "step": 21879 }, { "epoch": 0.7728410651289909, "grad_norm": 1.6558318138122559, "learning_rate": 1.2932583062551351e-06, "loss": 0.7818, "step": 21880 }, { "epoch": 0.7728763869326987, "grad_norm": 1.82073175907135, "learning_rate": 1.2928744429931895e-06, "loss": 0.6769, "step": 21881 }, { "epoch": 0.7729117087364066, "grad_norm": 1.9011002779006958, "learning_rate": 1.2924906282494048e-06, "loss": 0.7696, "step": 21882 }, { "epoch": 0.7729470305401145, "grad_norm": 1.8752341270446777, "learning_rate": 1.2921068620288057e-06, "loss": 0.7672, "step": 21883 }, { "epoch": 0.7729823523438224, "grad_norm": 1.8129355907440186, "learning_rate": 1.2917231443364164e-06, "loss": 0.8101, "step": 21884 }, { "epoch": 0.7730176741475303, "grad_norm": 1.8103699684143066, "learning_rate": 1.2913394751772556e-06, "loss": 0.7676, "step": 21885 }, { "epoch": 0.7730529959512382, "grad_norm": 1.6742255687713623, "learning_rate": 1.2909558545563473e-06, "loss": 0.7286, "step": 21886 }, { "epoch": 0.7730883177549461, "grad_norm": 1.6237883567810059, "learning_rate": 1.2905722824787125e-06, "loss": 0.7827, "step": 21887 }, { "epoch": 0.773123639558654, "grad_norm": 1.8492518663406372, "learning_rate": 1.2901887589493694e-06, "loss": 0.7631, "step": 21888 }, { "epoch": 0.773158961362362, "grad_norm": 1.504293441772461, "learning_rate": 1.2898052839733382e-06, "loss": 0.7384, "step": 21889 }, { "epoch": 0.7731942831660699, "grad_norm": 1.583331823348999, "learning_rate": 1.2894218575556382e-06, "loss": 0.7591, "step": 21890 }, { "epoch": 0.7732296049697778, "grad_norm": 1.534416913986206, "learning_rate": 1.2890384797012884e-06, "loss": 0.7899, "step": 21891 }, { "epoch": 0.7732649267734857, "grad_norm": 1.6556243896484375, "learning_rate": 1.2886551504153045e-06, "loss": 0.7305, "step": 21892 }, { "epoch": 0.7733002485771936, "grad_norm": 2.0290513038635254, "learning_rate": 1.2882718697027042e-06, "loss": 0.7235, "step": 21893 }, { "epoch": 0.7733355703809015, "grad_norm": 1.7687174081802368, "learning_rate": 1.2878886375685069e-06, "loss": 0.7726, "step": 21894 }, { "epoch": 0.7733708921846094, "grad_norm": 1.7479182481765747, "learning_rate": 1.2875054540177217e-06, "loss": 0.7967, "step": 21895 }, { "epoch": 0.7734062139883173, "grad_norm": 2.0285604000091553, "learning_rate": 1.2871223190553677e-06, "loss": 0.7262, "step": 21896 }, { "epoch": 0.7734415357920252, "grad_norm": 1.5757843255996704, "learning_rate": 1.2867392326864608e-06, "loss": 0.7805, "step": 21897 }, { "epoch": 0.7734768575957331, "grad_norm": 1.6488620042800903, "learning_rate": 1.2863561949160109e-06, "loss": 0.7747, "step": 21898 }, { "epoch": 0.7735121793994411, "grad_norm": 1.519535779953003, "learning_rate": 1.2859732057490327e-06, "loss": 0.7572, "step": 21899 }, { "epoch": 0.773547501203149, "grad_norm": 1.6836366653442383, "learning_rate": 1.2855902651905394e-06, "loss": 0.7846, "step": 21900 }, { "epoch": 0.7735828230068569, "grad_norm": 1.729151725769043, "learning_rate": 1.285207373245544e-06, "loss": 0.7357, "step": 21901 }, { "epoch": 0.7736181448105648, "grad_norm": 1.8074332475662231, "learning_rate": 1.2848245299190548e-06, "loss": 0.7785, "step": 21902 }, { "epoch": 0.7736534666142727, "grad_norm": 1.7725555896759033, "learning_rate": 1.2844417352160836e-06, "loss": 0.7976, "step": 21903 }, { "epoch": 0.7736887884179806, "grad_norm": 1.7307713031768799, "learning_rate": 1.2840589891416422e-06, "loss": 0.7933, "step": 21904 }, { "epoch": 0.7737241102216885, "grad_norm": 1.6570625305175781, "learning_rate": 1.2836762917007361e-06, "loss": 0.812, "step": 21905 }, { "epoch": 0.7737594320253964, "grad_norm": 1.8412201404571533, "learning_rate": 1.2832936428983766e-06, "loss": 0.7556, "step": 21906 }, { "epoch": 0.7737947538291042, "grad_norm": 1.848620057106018, "learning_rate": 1.282911042739573e-06, "loss": 0.739, "step": 21907 }, { "epoch": 0.7738300756328121, "grad_norm": 1.5859570503234863, "learning_rate": 1.282528491229329e-06, "loss": 0.7652, "step": 21908 }, { "epoch": 0.77386539743652, "grad_norm": 1.6368637084960938, "learning_rate": 1.2821459883726533e-06, "loss": 0.7719, "step": 21909 }, { "epoch": 0.773900719240228, "grad_norm": 1.6190664768218994, "learning_rate": 1.2817635341745538e-06, "loss": 0.7663, "step": 21910 }, { "epoch": 0.7739360410439359, "grad_norm": 1.951138973236084, "learning_rate": 1.2813811286400324e-06, "loss": 0.7899, "step": 21911 }, { "epoch": 0.7739713628476438, "grad_norm": 2.0454883575439453, "learning_rate": 1.2809987717740962e-06, "loss": 0.7483, "step": 21912 }, { "epoch": 0.7740066846513517, "grad_norm": 2.2439353466033936, "learning_rate": 1.2806164635817498e-06, "loss": 0.7289, "step": 21913 }, { "epoch": 0.7740420064550596, "grad_norm": 1.5898553133010864, "learning_rate": 1.2802342040679967e-06, "loss": 0.7619, "step": 21914 }, { "epoch": 0.7740773282587675, "grad_norm": 1.6743230819702148, "learning_rate": 1.2798519932378374e-06, "loss": 0.7609, "step": 21915 }, { "epoch": 0.7741126500624754, "grad_norm": 1.581792950630188, "learning_rate": 1.2794698310962756e-06, "loss": 0.7832, "step": 21916 }, { "epoch": 0.7741479718661833, "grad_norm": 1.672109603881836, "learning_rate": 1.2790877176483145e-06, "loss": 0.7831, "step": 21917 }, { "epoch": 0.7741832936698912, "grad_norm": 1.6625059843063354, "learning_rate": 1.2787056528989534e-06, "loss": 0.7933, "step": 21918 }, { "epoch": 0.7742186154735992, "grad_norm": 1.600350260734558, "learning_rate": 1.2783236368531927e-06, "loss": 0.7565, "step": 21919 }, { "epoch": 0.7742539372773071, "grad_norm": 1.7036371231079102, "learning_rate": 1.2779416695160336e-06, "loss": 0.7577, "step": 21920 }, { "epoch": 0.774289259081015, "grad_norm": 1.6384772062301636, "learning_rate": 1.277559750892473e-06, "loss": 0.7464, "step": 21921 }, { "epoch": 0.7743245808847229, "grad_norm": 1.5884760618209839, "learning_rate": 1.2771778809875113e-06, "loss": 0.7861, "step": 21922 }, { "epoch": 0.7743599026884308, "grad_norm": 1.619726300239563, "learning_rate": 1.2767960598061469e-06, "loss": 0.7413, "step": 21923 }, { "epoch": 0.7743952244921387, "grad_norm": 1.9186464548110962, "learning_rate": 1.2764142873533735e-06, "loss": 0.8097, "step": 21924 }, { "epoch": 0.7744305462958466, "grad_norm": 1.7456172704696655, "learning_rate": 1.2760325636341907e-06, "loss": 0.7485, "step": 21925 }, { "epoch": 0.7744658680995545, "grad_norm": 1.7498042583465576, "learning_rate": 1.275650888653595e-06, "loss": 0.7984, "step": 21926 }, { "epoch": 0.7745011899032624, "grad_norm": 1.6289622783660889, "learning_rate": 1.275269262416579e-06, "loss": 0.7795, "step": 21927 }, { "epoch": 0.7745365117069704, "grad_norm": 1.9118117094039917, "learning_rate": 1.2748876849281388e-06, "loss": 0.7756, "step": 21928 }, { "epoch": 0.7745718335106783, "grad_norm": 1.6393777132034302, "learning_rate": 1.2745061561932698e-06, "loss": 0.7517, "step": 21929 }, { "epoch": 0.7746071553143862, "grad_norm": 1.608676552772522, "learning_rate": 1.2741246762169624e-06, "loss": 0.755, "step": 21930 }, { "epoch": 0.7746424771180941, "grad_norm": 1.693949580192566, "learning_rate": 1.273743245004212e-06, "loss": 0.7438, "step": 21931 }, { "epoch": 0.774677798921802, "grad_norm": 2.4374794960021973, "learning_rate": 1.273361862560008e-06, "loss": 0.7692, "step": 21932 }, { "epoch": 0.7747131207255098, "grad_norm": 1.7838914394378662, "learning_rate": 1.2729805288893454e-06, "loss": 0.7376, "step": 21933 }, { "epoch": 0.7747484425292177, "grad_norm": 1.5154609680175781, "learning_rate": 1.2725992439972113e-06, "loss": 0.7542, "step": 21934 }, { "epoch": 0.7747837643329256, "grad_norm": 1.5260567665100098, "learning_rate": 1.2722180078885976e-06, "loss": 0.7344, "step": 21935 }, { "epoch": 0.7748190861366335, "grad_norm": 1.9953118562698364, "learning_rate": 1.2718368205684955e-06, "loss": 0.7584, "step": 21936 }, { "epoch": 0.7748544079403414, "grad_norm": 2.702284097671509, "learning_rate": 1.2714556820418904e-06, "loss": 0.7707, "step": 21937 }, { "epoch": 0.7748897297440493, "grad_norm": 1.7264320850372314, "learning_rate": 1.271074592313773e-06, "loss": 0.799, "step": 21938 }, { "epoch": 0.7749250515477573, "grad_norm": 1.801164984703064, "learning_rate": 1.2706935513891316e-06, "loss": 0.7357, "step": 21939 }, { "epoch": 0.7749603733514652, "grad_norm": 1.59174644947052, "learning_rate": 1.2703125592729503e-06, "loss": 0.7866, "step": 21940 }, { "epoch": 0.7749956951551731, "grad_norm": 1.5952692031860352, "learning_rate": 1.2699316159702175e-06, "loss": 0.755, "step": 21941 }, { "epoch": 0.775031016958881, "grad_norm": 1.5692470073699951, "learning_rate": 1.2695507214859199e-06, "loss": 0.7658, "step": 21942 }, { "epoch": 0.7750663387625889, "grad_norm": 1.6675444841384888, "learning_rate": 1.2691698758250393e-06, "loss": 0.7409, "step": 21943 }, { "epoch": 0.7751016605662968, "grad_norm": 1.5244587659835815, "learning_rate": 1.2687890789925627e-06, "loss": 0.7946, "step": 21944 }, { "epoch": 0.7751369823700047, "grad_norm": 1.531585693359375, "learning_rate": 1.268408330993474e-06, "loss": 0.7428, "step": 21945 }, { "epoch": 0.7751723041737126, "grad_norm": 1.6400610208511353, "learning_rate": 1.268027631832755e-06, "loss": 0.7444, "step": 21946 }, { "epoch": 0.7752076259774205, "grad_norm": 1.6732699871063232, "learning_rate": 1.267646981515388e-06, "loss": 0.7825, "step": 21947 }, { "epoch": 0.7752429477811285, "grad_norm": 1.6383466720581055, "learning_rate": 1.2672663800463564e-06, "loss": 0.7832, "step": 21948 }, { "epoch": 0.7752782695848364, "grad_norm": 1.563563585281372, "learning_rate": 1.2668858274306434e-06, "loss": 0.7685, "step": 21949 }, { "epoch": 0.7753135913885443, "grad_norm": 1.490574598312378, "learning_rate": 1.266505323673224e-06, "loss": 0.7581, "step": 21950 }, { "epoch": 0.7753489131922522, "grad_norm": 1.78864324092865, "learning_rate": 1.2661248687790806e-06, "loss": 0.7775, "step": 21951 }, { "epoch": 0.7753842349959601, "grad_norm": 1.7625072002410889, "learning_rate": 1.265744462753195e-06, "loss": 0.7381, "step": 21952 }, { "epoch": 0.775419556799668, "grad_norm": 1.6309014558792114, "learning_rate": 1.2653641056005422e-06, "loss": 0.745, "step": 21953 }, { "epoch": 0.7754548786033759, "grad_norm": 1.740065336227417, "learning_rate": 1.2649837973261025e-06, "loss": 0.7839, "step": 21954 }, { "epoch": 0.7754902004070838, "grad_norm": 1.6796241998672485, "learning_rate": 1.2646035379348537e-06, "loss": 0.7597, "step": 21955 }, { "epoch": 0.7755255222107917, "grad_norm": 1.5553699731826782, "learning_rate": 1.2642233274317705e-06, "loss": 0.7754, "step": 21956 }, { "epoch": 0.7755608440144997, "grad_norm": 1.7053780555725098, "learning_rate": 1.2638431658218298e-06, "loss": 0.7812, "step": 21957 }, { "epoch": 0.7755961658182076, "grad_norm": 1.564671277999878, "learning_rate": 1.2634630531100077e-06, "loss": 0.7464, "step": 21958 }, { "epoch": 0.7756314876219154, "grad_norm": 1.6416629552841187, "learning_rate": 1.2630829893012797e-06, "loss": 0.7325, "step": 21959 }, { "epoch": 0.7756668094256233, "grad_norm": 1.683849811553955, "learning_rate": 1.262702974400618e-06, "loss": 0.8072, "step": 21960 }, { "epoch": 0.7757021312293312, "grad_norm": 1.8168972730636597, "learning_rate": 1.2623230084129972e-06, "loss": 0.7911, "step": 21961 }, { "epoch": 0.7757374530330391, "grad_norm": 1.5990586280822754, "learning_rate": 1.2619430913433923e-06, "loss": 0.7348, "step": 21962 }, { "epoch": 0.775772774836747, "grad_norm": 1.742276906967163, "learning_rate": 1.261563223196771e-06, "loss": 0.7855, "step": 21963 }, { "epoch": 0.7758080966404549, "grad_norm": 1.802974820137024, "learning_rate": 1.2611834039781085e-06, "loss": 0.7824, "step": 21964 }, { "epoch": 0.7758434184441628, "grad_norm": 1.6154873371124268, "learning_rate": 1.2608036336923758e-06, "loss": 0.7594, "step": 21965 }, { "epoch": 0.7758787402478707, "grad_norm": 1.5788638591766357, "learning_rate": 1.2604239123445406e-06, "loss": 0.7714, "step": 21966 }, { "epoch": 0.7759140620515786, "grad_norm": 2.958225965499878, "learning_rate": 1.2600442399395745e-06, "loss": 0.7661, "step": 21967 }, { "epoch": 0.7759493838552866, "grad_norm": 1.6664857864379883, "learning_rate": 1.2596646164824477e-06, "loss": 0.769, "step": 21968 }, { "epoch": 0.7759847056589945, "grad_norm": 1.5538949966430664, "learning_rate": 1.2592850419781272e-06, "loss": 0.755, "step": 21969 }, { "epoch": 0.7760200274627024, "grad_norm": 1.9342525005340576, "learning_rate": 1.25890551643158e-06, "loss": 0.7973, "step": 21970 }, { "epoch": 0.7760553492664103, "grad_norm": 1.6342015266418457, "learning_rate": 1.2585260398477733e-06, "loss": 0.7588, "step": 21971 }, { "epoch": 0.7760906710701182, "grad_norm": 1.5980761051177979, "learning_rate": 1.2581466122316766e-06, "loss": 0.7481, "step": 21972 }, { "epoch": 0.7761259928738261, "grad_norm": 1.5941888093948364, "learning_rate": 1.2577672335882518e-06, "loss": 0.7536, "step": 21973 }, { "epoch": 0.776161314677534, "grad_norm": 1.8374890089035034, "learning_rate": 1.2573879039224662e-06, "loss": 0.801, "step": 21974 }, { "epoch": 0.7761966364812419, "grad_norm": 1.8045955896377563, "learning_rate": 1.2570086232392858e-06, "loss": 0.7425, "step": 21975 }, { "epoch": 0.7762319582849498, "grad_norm": 1.6783181428909302, "learning_rate": 1.2566293915436712e-06, "loss": 0.8093, "step": 21976 }, { "epoch": 0.7762672800886578, "grad_norm": 1.8481965065002441, "learning_rate": 1.2562502088405876e-06, "loss": 0.7595, "step": 21977 }, { "epoch": 0.7763026018923657, "grad_norm": 1.5999960899353027, "learning_rate": 1.2558710751349983e-06, "loss": 0.7438, "step": 21978 }, { "epoch": 0.7763379236960736, "grad_norm": 1.618586540222168, "learning_rate": 1.255491990431864e-06, "loss": 0.765, "step": 21979 }, { "epoch": 0.7763732454997815, "grad_norm": 1.6262542009353638, "learning_rate": 1.2551129547361462e-06, "loss": 0.7688, "step": 21980 }, { "epoch": 0.7764085673034894, "grad_norm": 1.6858413219451904, "learning_rate": 1.2547339680528076e-06, "loss": 0.7617, "step": 21981 }, { "epoch": 0.7764438891071973, "grad_norm": 1.617993950843811, "learning_rate": 1.2543550303868058e-06, "loss": 0.7747, "step": 21982 }, { "epoch": 0.7764792109109052, "grad_norm": 1.8113234043121338, "learning_rate": 1.2539761417431007e-06, "loss": 0.7605, "step": 21983 }, { "epoch": 0.7765145327146131, "grad_norm": 1.7443716526031494, "learning_rate": 1.2535973021266534e-06, "loss": 0.7874, "step": 21984 }, { "epoch": 0.7765498545183209, "grad_norm": 1.6286299228668213, "learning_rate": 1.2532185115424183e-06, "loss": 0.7645, "step": 21985 }, { "epoch": 0.7765851763220288, "grad_norm": 1.7627867460250854, "learning_rate": 1.2528397699953571e-06, "loss": 0.7492, "step": 21986 }, { "epoch": 0.7766204981257367, "grad_norm": 1.6518782377243042, "learning_rate": 1.2524610774904234e-06, "loss": 0.761, "step": 21987 }, { "epoch": 0.7766558199294447, "grad_norm": 1.5489987134933472, "learning_rate": 1.2520824340325755e-06, "loss": 0.7533, "step": 21988 }, { "epoch": 0.7766911417331526, "grad_norm": 1.7917057275772095, "learning_rate": 1.2517038396267668e-06, "loss": 0.7551, "step": 21989 }, { "epoch": 0.7767264635368605, "grad_norm": 1.5864866971969604, "learning_rate": 1.2513252942779535e-06, "loss": 0.7773, "step": 21990 }, { "epoch": 0.7767617853405684, "grad_norm": 1.5735780000686646, "learning_rate": 1.2509467979910923e-06, "loss": 0.7317, "step": 21991 }, { "epoch": 0.7767971071442763, "grad_norm": 1.6131116151809692, "learning_rate": 1.2505683507711326e-06, "loss": 0.75, "step": 21992 }, { "epoch": 0.7768324289479842, "grad_norm": 1.612177848815918, "learning_rate": 1.2501899526230292e-06, "loss": 0.7783, "step": 21993 }, { "epoch": 0.7768677507516921, "grad_norm": 2.2784228324890137, "learning_rate": 1.249811603551737e-06, "loss": 0.7781, "step": 21994 }, { "epoch": 0.7769030725554, "grad_norm": 1.683606505393982, "learning_rate": 1.2494333035622036e-06, "loss": 0.7891, "step": 21995 }, { "epoch": 0.7769383943591079, "grad_norm": 1.886409044265747, "learning_rate": 1.2490550526593826e-06, "loss": 0.767, "step": 21996 }, { "epoch": 0.7769737161628159, "grad_norm": 1.5727931261062622, "learning_rate": 1.248676850848225e-06, "loss": 0.777, "step": 21997 }, { "epoch": 0.7770090379665238, "grad_norm": 1.767694354057312, "learning_rate": 1.248298698133678e-06, "loss": 0.8185, "step": 21998 }, { "epoch": 0.7770443597702317, "grad_norm": 1.696958303451538, "learning_rate": 1.2479205945206929e-06, "loss": 0.7532, "step": 21999 }, { "epoch": 0.7770796815739396, "grad_norm": 1.6758559942245483, "learning_rate": 1.2475425400142188e-06, "loss": 0.7554, "step": 22000 }, { "epoch": 0.7771150033776475, "grad_norm": 1.7692837715148926, "learning_rate": 1.2471645346192012e-06, "loss": 0.7724, "step": 22001 }, { "epoch": 0.7771503251813554, "grad_norm": 1.7788894176483154, "learning_rate": 1.246786578340589e-06, "loss": 0.7873, "step": 22002 }, { "epoch": 0.7771856469850633, "grad_norm": 1.9659841060638428, "learning_rate": 1.2464086711833295e-06, "loss": 0.7868, "step": 22003 }, { "epoch": 0.7772209687887712, "grad_norm": 1.4776123762130737, "learning_rate": 1.246030813152367e-06, "loss": 0.7453, "step": 22004 }, { "epoch": 0.7772562905924791, "grad_norm": 1.5944724082946777, "learning_rate": 1.2456530042526483e-06, "loss": 0.7646, "step": 22005 }, { "epoch": 0.777291612396187, "grad_norm": 1.8746025562286377, "learning_rate": 1.245275244489116e-06, "loss": 0.7225, "step": 22006 }, { "epoch": 0.777326934199895, "grad_norm": 1.5666987895965576, "learning_rate": 1.2448975338667173e-06, "loss": 0.7591, "step": 22007 }, { "epoch": 0.7773622560036029, "grad_norm": 1.7886630296707153, "learning_rate": 1.2445198723903918e-06, "loss": 0.7727, "step": 22008 }, { "epoch": 0.7773975778073108, "grad_norm": 1.7326760292053223, "learning_rate": 1.2441422600650848e-06, "loss": 0.7849, "step": 22009 }, { "epoch": 0.7774328996110187, "grad_norm": 5.809010028839111, "learning_rate": 1.243764696895739e-06, "loss": 0.7501, "step": 22010 }, { "epoch": 0.7774682214147265, "grad_norm": 1.9203742742538452, "learning_rate": 1.2433871828872934e-06, "loss": 0.8138, "step": 22011 }, { "epoch": 0.7775035432184344, "grad_norm": 1.890945553779602, "learning_rate": 1.2430097180446904e-06, "loss": 0.82, "step": 22012 }, { "epoch": 0.7775388650221423, "grad_norm": 2.124677896499634, "learning_rate": 1.242632302372871e-06, "loss": 0.7853, "step": 22013 }, { "epoch": 0.7775741868258502, "grad_norm": 1.6661771535873413, "learning_rate": 1.2422549358767721e-06, "loss": 0.7623, "step": 22014 }, { "epoch": 0.7776095086295581, "grad_norm": 1.6682865619659424, "learning_rate": 1.2418776185613351e-06, "loss": 0.7588, "step": 22015 }, { "epoch": 0.777644830433266, "grad_norm": 1.1120437383651733, "learning_rate": 1.2415003504314965e-06, "loss": 0.5907, "step": 22016 }, { "epoch": 0.777680152236974, "grad_norm": 1.6013789176940918, "learning_rate": 1.2411231314921967e-06, "loss": 0.7343, "step": 22017 }, { "epoch": 0.7777154740406819, "grad_norm": 1.6799969673156738, "learning_rate": 1.240745961748369e-06, "loss": 0.7617, "step": 22018 }, { "epoch": 0.7777507958443898, "grad_norm": 1.8955870866775513, "learning_rate": 1.2403688412049519e-06, "loss": 0.7927, "step": 22019 }, { "epoch": 0.7777861176480977, "grad_norm": 1.7118791341781616, "learning_rate": 1.2399917698668818e-06, "loss": 0.7629, "step": 22020 }, { "epoch": 0.7778214394518056, "grad_norm": 1.7186692953109741, "learning_rate": 1.2396147477390913e-06, "loss": 0.7696, "step": 22021 }, { "epoch": 0.7778567612555135, "grad_norm": 3.0887675285339355, "learning_rate": 1.239237774826516e-06, "loss": 0.7662, "step": 22022 }, { "epoch": 0.7778920830592214, "grad_norm": 1.62141752243042, "learning_rate": 1.2388608511340932e-06, "loss": 0.7253, "step": 22023 }, { "epoch": 0.7779274048629293, "grad_norm": 2.128530502319336, "learning_rate": 1.238483976666749e-06, "loss": 0.7527, "step": 22024 }, { "epoch": 0.7779627266666372, "grad_norm": 1.6757007837295532, "learning_rate": 1.2381071514294197e-06, "loss": 0.8026, "step": 22025 }, { "epoch": 0.7779980484703451, "grad_norm": 1.8049535751342773, "learning_rate": 1.237730375427037e-06, "loss": 0.7699, "step": 22026 }, { "epoch": 0.7780333702740531, "grad_norm": 1.7026774883270264, "learning_rate": 1.2373536486645331e-06, "loss": 0.7645, "step": 22027 }, { "epoch": 0.778068692077761, "grad_norm": 1.6493221521377563, "learning_rate": 1.236976971146836e-06, "loss": 0.7596, "step": 22028 }, { "epoch": 0.7781040138814689, "grad_norm": 1.6794230937957764, "learning_rate": 1.2366003428788764e-06, "loss": 0.7878, "step": 22029 }, { "epoch": 0.7781393356851768, "grad_norm": 1.8089710474014282, "learning_rate": 1.236223763865586e-06, "loss": 0.7354, "step": 22030 }, { "epoch": 0.7781746574888847, "grad_norm": 1.602019190788269, "learning_rate": 1.2358472341118904e-06, "loss": 0.7566, "step": 22031 }, { "epoch": 0.7782099792925926, "grad_norm": 1.805979609489441, "learning_rate": 1.2354707536227178e-06, "loss": 0.7871, "step": 22032 }, { "epoch": 0.7782453010963005, "grad_norm": 1.7104355096817017, "learning_rate": 1.2350943224029988e-06, "loss": 0.7349, "step": 22033 }, { "epoch": 0.7782806229000084, "grad_norm": 1.5257381200790405, "learning_rate": 1.2347179404576554e-06, "loss": 0.7603, "step": 22034 }, { "epoch": 0.7783159447037163, "grad_norm": 1.6544958353042603, "learning_rate": 1.2343416077916158e-06, "loss": 0.7794, "step": 22035 }, { "epoch": 0.7783512665074243, "grad_norm": 1.6724367141723633, "learning_rate": 1.2339653244098072e-06, "loss": 0.7543, "step": 22036 }, { "epoch": 0.778386588311132, "grad_norm": 1.6278637647628784, "learning_rate": 1.2335890903171505e-06, "loss": 0.7823, "step": 22037 }, { "epoch": 0.77842191011484, "grad_norm": 1.6968519687652588, "learning_rate": 1.233212905518572e-06, "loss": 0.7786, "step": 22038 }, { "epoch": 0.7784572319185479, "grad_norm": 1.5411441326141357, "learning_rate": 1.2328367700189965e-06, "loss": 0.7493, "step": 22039 }, { "epoch": 0.7784925537222558, "grad_norm": 1.7391482591629028, "learning_rate": 1.2324606838233438e-06, "loss": 0.7614, "step": 22040 }, { "epoch": 0.7785278755259637, "grad_norm": 1.7503212690353394, "learning_rate": 1.2320846469365372e-06, "loss": 0.7678, "step": 22041 }, { "epoch": 0.7785631973296716, "grad_norm": 0.9628947377204895, "learning_rate": 1.2317086593635003e-06, "loss": 0.5706, "step": 22042 }, { "epoch": 0.7785985191333795, "grad_norm": 1.586862325668335, "learning_rate": 1.2313327211091518e-06, "loss": 0.7841, "step": 22043 }, { "epoch": 0.7786338409370874, "grad_norm": 1.7338639497756958, "learning_rate": 1.230956832178411e-06, "loss": 0.7565, "step": 22044 }, { "epoch": 0.7786691627407953, "grad_norm": 1.7105826139450073, "learning_rate": 1.2305809925761986e-06, "loss": 0.7366, "step": 22045 }, { "epoch": 0.7787044845445033, "grad_norm": 1.7478818893432617, "learning_rate": 1.2302052023074345e-06, "loss": 0.7682, "step": 22046 }, { "epoch": 0.7787398063482112, "grad_norm": 1.9375051259994507, "learning_rate": 1.229829461377035e-06, "loss": 0.7836, "step": 22047 }, { "epoch": 0.7787751281519191, "grad_norm": 1.6363580226898193, "learning_rate": 1.229453769789919e-06, "loss": 0.7561, "step": 22048 }, { "epoch": 0.778810449955627, "grad_norm": 1.6782424449920654, "learning_rate": 1.2290781275510044e-06, "loss": 0.7782, "step": 22049 }, { "epoch": 0.7788457717593349, "grad_norm": 1.5923304557800293, "learning_rate": 1.2287025346652053e-06, "loss": 0.7808, "step": 22050 }, { "epoch": 0.7788810935630428, "grad_norm": 0.9527945518493652, "learning_rate": 1.2283269911374384e-06, "loss": 0.5869, "step": 22051 }, { "epoch": 0.7789164153667507, "grad_norm": 2.0035483837127686, "learning_rate": 1.2279514969726204e-06, "loss": 0.7609, "step": 22052 }, { "epoch": 0.7789517371704586, "grad_norm": 1.7637499570846558, "learning_rate": 1.2275760521756624e-06, "loss": 0.7838, "step": 22053 }, { "epoch": 0.7789870589741665, "grad_norm": 1.6566822528839111, "learning_rate": 1.22720065675148e-06, "loss": 0.7777, "step": 22054 }, { "epoch": 0.7790223807778744, "grad_norm": 1.6104685068130493, "learning_rate": 1.2268253107049876e-06, "loss": 0.8157, "step": 22055 }, { "epoch": 0.7790577025815824, "grad_norm": 1.7658519744873047, "learning_rate": 1.226450014041095e-06, "loss": 0.7568, "step": 22056 }, { "epoch": 0.7790930243852903, "grad_norm": 1.9190442562103271, "learning_rate": 1.2260747667647148e-06, "loss": 0.7843, "step": 22057 }, { "epoch": 0.7791283461889982, "grad_norm": 1.6785608530044556, "learning_rate": 1.225699568880761e-06, "loss": 0.7605, "step": 22058 }, { "epoch": 0.7791636679927061, "grad_norm": 1.5674291849136353, "learning_rate": 1.2253244203941395e-06, "loss": 0.7996, "step": 22059 }, { "epoch": 0.779198989796414, "grad_norm": 1.8429733514785767, "learning_rate": 1.2249493213097642e-06, "loss": 0.7521, "step": 22060 }, { "epoch": 0.7792343116001219, "grad_norm": 1.5780184268951416, "learning_rate": 1.224574271632541e-06, "loss": 0.7753, "step": 22061 }, { "epoch": 0.7792696334038298, "grad_norm": 1.5507960319519043, "learning_rate": 1.2241992713673811e-06, "loss": 0.7643, "step": 22062 }, { "epoch": 0.7793049552075376, "grad_norm": 4.031674385070801, "learning_rate": 1.2238243205191907e-06, "loss": 0.7577, "step": 22063 }, { "epoch": 0.7793402770112455, "grad_norm": 1.6582860946655273, "learning_rate": 1.223449419092877e-06, "loss": 0.7925, "step": 22064 }, { "epoch": 0.7793755988149534, "grad_norm": 1.7176342010498047, "learning_rate": 1.2230745670933491e-06, "loss": 0.7678, "step": 22065 }, { "epoch": 0.7794109206186614, "grad_norm": 1.767727255821228, "learning_rate": 1.22269976452551e-06, "loss": 0.8011, "step": 22066 }, { "epoch": 0.7794462424223693, "grad_norm": 1.6569534540176392, "learning_rate": 1.2223250113942664e-06, "loss": 0.7395, "step": 22067 }, { "epoch": 0.7794815642260772, "grad_norm": 1.6596567630767822, "learning_rate": 1.2219503077045243e-06, "loss": 0.8152, "step": 22068 }, { "epoch": 0.7795168860297851, "grad_norm": 1.711728811264038, "learning_rate": 1.221575653461185e-06, "loss": 0.758, "step": 22069 }, { "epoch": 0.779552207833493, "grad_norm": 2.304051399230957, "learning_rate": 1.2212010486691527e-06, "loss": 0.7446, "step": 22070 }, { "epoch": 0.7795875296372009, "grad_norm": 1.719698429107666, "learning_rate": 1.220826493333333e-06, "loss": 0.7649, "step": 22071 }, { "epoch": 0.7796228514409088, "grad_norm": 1.6851823329925537, "learning_rate": 1.2204519874586235e-06, "loss": 0.7978, "step": 22072 }, { "epoch": 0.7796581732446167, "grad_norm": 4.544886112213135, "learning_rate": 1.2200775310499285e-06, "loss": 0.7672, "step": 22073 }, { "epoch": 0.7796934950483246, "grad_norm": 1.7298418283462524, "learning_rate": 1.2197031241121483e-06, "loss": 0.8267, "step": 22074 }, { "epoch": 0.7797288168520325, "grad_norm": 1.7189562320709229, "learning_rate": 1.219328766650184e-06, "loss": 0.7427, "step": 22075 }, { "epoch": 0.7797641386557405, "grad_norm": 1.766911268234253, "learning_rate": 1.218954458668933e-06, "loss": 0.8104, "step": 22076 }, { "epoch": 0.7797994604594484, "grad_norm": 1.88929283618927, "learning_rate": 1.218580200173295e-06, "loss": 0.748, "step": 22077 }, { "epoch": 0.7798347822631563, "grad_norm": 1.6893268823623657, "learning_rate": 1.2182059911681698e-06, "loss": 0.7695, "step": 22078 }, { "epoch": 0.7798701040668642, "grad_norm": 2.025270700454712, "learning_rate": 1.2178318316584542e-06, "loss": 0.7672, "step": 22079 }, { "epoch": 0.7799054258705721, "grad_norm": 1.7384918928146362, "learning_rate": 1.217457721649043e-06, "loss": 0.7595, "step": 22080 }, { "epoch": 0.77994074767428, "grad_norm": 1.6004290580749512, "learning_rate": 1.2170836611448355e-06, "loss": 0.767, "step": 22081 }, { "epoch": 0.7799760694779879, "grad_norm": 1.7866616249084473, "learning_rate": 1.2167096501507246e-06, "loss": 0.7418, "step": 22082 }, { "epoch": 0.7800113912816958, "grad_norm": 1.682323694229126, "learning_rate": 1.2163356886716065e-06, "loss": 0.7935, "step": 22083 }, { "epoch": 0.7800467130854037, "grad_norm": 1.6398751735687256, "learning_rate": 1.2159617767123754e-06, "loss": 0.8073, "step": 22084 }, { "epoch": 0.7800820348891117, "grad_norm": 1.806336760520935, "learning_rate": 1.215587914277927e-06, "loss": 0.7354, "step": 22085 }, { "epoch": 0.7801173566928196, "grad_norm": 1.6909271478652954, "learning_rate": 1.215214101373151e-06, "loss": 0.7736, "step": 22086 }, { "epoch": 0.7801526784965275, "grad_norm": 1.7388683557510376, "learning_rate": 1.2148403380029417e-06, "loss": 0.7998, "step": 22087 }, { "epoch": 0.7801880003002354, "grad_norm": 1.9812828302383423, "learning_rate": 1.2144666241721919e-06, "loss": 0.8185, "step": 22088 }, { "epoch": 0.7802233221039432, "grad_norm": 1.5928807258605957, "learning_rate": 1.2140929598857893e-06, "loss": 0.7779, "step": 22089 }, { "epoch": 0.7802586439076511, "grad_norm": 1.6805570125579834, "learning_rate": 1.2137193451486269e-06, "loss": 0.7541, "step": 22090 }, { "epoch": 0.780293965711359, "grad_norm": 1.6457016468048096, "learning_rate": 1.2133457799655956e-06, "loss": 0.755, "step": 22091 }, { "epoch": 0.7803292875150669, "grad_norm": 1.758583903312683, "learning_rate": 1.212972264341581e-06, "loss": 0.7695, "step": 22092 }, { "epoch": 0.7803646093187748, "grad_norm": 1.714363694190979, "learning_rate": 1.2125987982814736e-06, "loss": 0.761, "step": 22093 }, { "epoch": 0.7803999311224827, "grad_norm": 1.7077946662902832, "learning_rate": 1.212225381790163e-06, "loss": 0.7578, "step": 22094 }, { "epoch": 0.7804352529261906, "grad_norm": 1.7072901725769043, "learning_rate": 1.2118520148725327e-06, "loss": 0.7978, "step": 22095 }, { "epoch": 0.7804705747298986, "grad_norm": 1.6038932800292969, "learning_rate": 1.2114786975334714e-06, "loss": 0.7384, "step": 22096 }, { "epoch": 0.7805058965336065, "grad_norm": 1.538458228111267, "learning_rate": 1.2111054297778658e-06, "loss": 0.7666, "step": 22097 }, { "epoch": 0.7805412183373144, "grad_norm": 1.5754529237747192, "learning_rate": 1.2107322116106002e-06, "loss": 0.7278, "step": 22098 }, { "epoch": 0.7805765401410223, "grad_norm": 1.7178354263305664, "learning_rate": 1.210359043036558e-06, "loss": 0.7608, "step": 22099 }, { "epoch": 0.7806118619447302, "grad_norm": 1.5507441759109497, "learning_rate": 1.2099859240606243e-06, "loss": 0.7455, "step": 22100 }, { "epoch": 0.7806471837484381, "grad_norm": 1.7920494079589844, "learning_rate": 1.2096128546876835e-06, "loss": 0.7967, "step": 22101 }, { "epoch": 0.780682505552146, "grad_norm": 1.7242375612258911, "learning_rate": 1.209239834922616e-06, "loss": 0.7503, "step": 22102 }, { "epoch": 0.7807178273558539, "grad_norm": 1.6554715633392334, "learning_rate": 1.2088668647703045e-06, "loss": 0.7331, "step": 22103 }, { "epoch": 0.7807531491595618, "grad_norm": 1.7229450941085815, "learning_rate": 1.2084939442356325e-06, "loss": 0.7703, "step": 22104 }, { "epoch": 0.7807884709632698, "grad_norm": 1.9138635396957397, "learning_rate": 1.2081210733234777e-06, "loss": 0.7509, "step": 22105 }, { "epoch": 0.7808237927669777, "grad_norm": 1.5043480396270752, "learning_rate": 1.207748252038722e-06, "loss": 0.7532, "step": 22106 }, { "epoch": 0.7808591145706856, "grad_norm": 1.8025174140930176, "learning_rate": 1.2073754803862448e-06, "loss": 0.7967, "step": 22107 }, { "epoch": 0.7808944363743935, "grad_norm": 1.7442809343338013, "learning_rate": 1.2070027583709232e-06, "loss": 0.735, "step": 22108 }, { "epoch": 0.7809297581781014, "grad_norm": 1.7014092206954956, "learning_rate": 1.2066300859976365e-06, "loss": 0.7399, "step": 22109 }, { "epoch": 0.7809650799818093, "grad_norm": 1.8761539459228516, "learning_rate": 1.2062574632712637e-06, "loss": 0.7605, "step": 22110 }, { "epoch": 0.7810004017855172, "grad_norm": 1.7971441745758057, "learning_rate": 1.2058848901966791e-06, "loss": 0.7823, "step": 22111 }, { "epoch": 0.7810357235892251, "grad_norm": 1.5879417657852173, "learning_rate": 1.2055123667787595e-06, "loss": 0.7567, "step": 22112 }, { "epoch": 0.781071045392933, "grad_norm": 1.9334782361984253, "learning_rate": 1.2051398930223817e-06, "loss": 0.735, "step": 22113 }, { "epoch": 0.781106367196641, "grad_norm": 1.6211329698562622, "learning_rate": 1.2047674689324186e-06, "loss": 0.804, "step": 22114 }, { "epoch": 0.7811416890003487, "grad_norm": 1.6052417755126953, "learning_rate": 1.2043950945137457e-06, "loss": 0.7598, "step": 22115 }, { "epoch": 0.7811770108040567, "grad_norm": 1.6423900127410889, "learning_rate": 1.204022769771237e-06, "loss": 0.7518, "step": 22116 }, { "epoch": 0.7812123326077646, "grad_norm": 1.7027685642242432, "learning_rate": 1.2036504947097655e-06, "loss": 0.75, "step": 22117 }, { "epoch": 0.7812476544114725, "grad_norm": 1.7900527715682983, "learning_rate": 1.203278269334201e-06, "loss": 0.7586, "step": 22118 }, { "epoch": 0.7812829762151804, "grad_norm": 1.8587477207183838, "learning_rate": 1.2029060936494163e-06, "loss": 0.7865, "step": 22119 }, { "epoch": 0.7813182980188883, "grad_norm": 1.6343903541564941, "learning_rate": 1.2025339676602848e-06, "loss": 0.7439, "step": 22120 }, { "epoch": 0.7813536198225962, "grad_norm": 1.5883193016052246, "learning_rate": 1.2021618913716737e-06, "loss": 0.7167, "step": 22121 }, { "epoch": 0.7813889416263041, "grad_norm": 1.0494896173477173, "learning_rate": 1.2017898647884536e-06, "loss": 0.6118, "step": 22122 }, { "epoch": 0.781424263430012, "grad_norm": 1.7989113330841064, "learning_rate": 1.2014178879154948e-06, "loss": 0.709, "step": 22123 }, { "epoch": 0.7814595852337199, "grad_norm": 1.7632033824920654, "learning_rate": 1.2010459607576636e-06, "loss": 0.7568, "step": 22124 }, { "epoch": 0.7814949070374279, "grad_norm": 1.662164568901062, "learning_rate": 1.2006740833198287e-06, "loss": 0.7689, "step": 22125 }, { "epoch": 0.7815302288411358, "grad_norm": 2.6653964519500732, "learning_rate": 1.2003022556068589e-06, "loss": 0.7797, "step": 22126 }, { "epoch": 0.7815655506448437, "grad_norm": 1.900673747062683, "learning_rate": 1.199930477623617e-06, "loss": 0.7821, "step": 22127 }, { "epoch": 0.7816008724485516, "grad_norm": 1.7893242835998535, "learning_rate": 1.199558749374971e-06, "loss": 0.7514, "step": 22128 }, { "epoch": 0.7816361942522595, "grad_norm": 1.587931752204895, "learning_rate": 1.199187070865787e-06, "loss": 0.7377, "step": 22129 }, { "epoch": 0.7816715160559674, "grad_norm": 1.6820931434631348, "learning_rate": 1.198815442100927e-06, "loss": 0.773, "step": 22130 }, { "epoch": 0.7817068378596753, "grad_norm": 2.0087904930114746, "learning_rate": 1.1984438630852552e-06, "loss": 0.7645, "step": 22131 }, { "epoch": 0.7817421596633832, "grad_norm": 1.5982087850570679, "learning_rate": 1.1980723338236361e-06, "loss": 0.77, "step": 22132 }, { "epoch": 0.7817774814670911, "grad_norm": 1.8178690671920776, "learning_rate": 1.1977008543209329e-06, "loss": 0.7839, "step": 22133 }, { "epoch": 0.781812803270799, "grad_norm": 1.6938117742538452, "learning_rate": 1.197329424582006e-06, "loss": 0.7706, "step": 22134 }, { "epoch": 0.781848125074507, "grad_norm": 1.6583658456802368, "learning_rate": 1.1969580446117158e-06, "loss": 0.7611, "step": 22135 }, { "epoch": 0.7818834468782149, "grad_norm": 1.6020023822784424, "learning_rate": 1.1965867144149246e-06, "loss": 0.7734, "step": 22136 }, { "epoch": 0.7819187686819228, "grad_norm": 1.665306568145752, "learning_rate": 1.1962154339964905e-06, "loss": 0.7827, "step": 22137 }, { "epoch": 0.7819540904856307, "grad_norm": 1.7046247720718384, "learning_rate": 1.1958442033612737e-06, "loss": 0.7533, "step": 22138 }, { "epoch": 0.7819894122893386, "grad_norm": 1.6546088457107544, "learning_rate": 1.1954730225141342e-06, "loss": 0.7501, "step": 22139 }, { "epoch": 0.7820247340930465, "grad_norm": 1.7777942419052124, "learning_rate": 1.1951018914599272e-06, "loss": 0.7747, "step": 22140 }, { "epoch": 0.7820600558967543, "grad_norm": 1.6567338705062866, "learning_rate": 1.1947308102035117e-06, "loss": 0.76, "step": 22141 }, { "epoch": 0.7820953777004622, "grad_norm": 1.4763191938400269, "learning_rate": 1.1943597787497435e-06, "loss": 0.7554, "step": 22142 }, { "epoch": 0.7821306995041701, "grad_norm": 1.6497479677200317, "learning_rate": 1.1939887971034814e-06, "loss": 0.8029, "step": 22143 }, { "epoch": 0.782166021307878, "grad_norm": 1.754292607307434, "learning_rate": 1.1936178652695763e-06, "loss": 0.7629, "step": 22144 }, { "epoch": 0.782201343111586, "grad_norm": 1.015061855316162, "learning_rate": 1.1932469832528854e-06, "loss": 0.5743, "step": 22145 }, { "epoch": 0.7822366649152939, "grad_norm": 1.6245194673538208, "learning_rate": 1.1928761510582636e-06, "loss": 0.7451, "step": 22146 }, { "epoch": 0.7822719867190018, "grad_norm": 1.5722030401229858, "learning_rate": 1.1925053686905618e-06, "loss": 0.7513, "step": 22147 }, { "epoch": 0.7823073085227097, "grad_norm": 1.8579200506210327, "learning_rate": 1.1921346361546338e-06, "loss": 0.7883, "step": 22148 }, { "epoch": 0.7823426303264176, "grad_norm": 1.6883846521377563, "learning_rate": 1.1917639534553337e-06, "loss": 0.759, "step": 22149 }, { "epoch": 0.7823779521301255, "grad_norm": 1.5069082975387573, "learning_rate": 1.1913933205975092e-06, "loss": 0.7111, "step": 22150 }, { "epoch": 0.7824132739338334, "grad_norm": 1.7224558591842651, "learning_rate": 1.1910227375860133e-06, "loss": 0.7666, "step": 22151 }, { "epoch": 0.7824485957375413, "grad_norm": 1.7931814193725586, "learning_rate": 1.1906522044256973e-06, "loss": 0.7795, "step": 22152 }, { "epoch": 0.7824839175412492, "grad_norm": 1.7191259860992432, "learning_rate": 1.190281721121409e-06, "loss": 0.7763, "step": 22153 }, { "epoch": 0.7825192393449572, "grad_norm": 1.63479745388031, "learning_rate": 1.1899112876779957e-06, "loss": 0.737, "step": 22154 }, { "epoch": 0.7825545611486651, "grad_norm": 1.6130098104476929, "learning_rate": 1.1895409041003075e-06, "loss": 0.7728, "step": 22155 }, { "epoch": 0.782589882952373, "grad_norm": 1.7061433792114258, "learning_rate": 1.1891705703931933e-06, "loss": 0.7546, "step": 22156 }, { "epoch": 0.7826252047560809, "grad_norm": 1.7762210369110107, "learning_rate": 1.1888002865614968e-06, "loss": 0.7825, "step": 22157 }, { "epoch": 0.7826605265597888, "grad_norm": 1.6705502271652222, "learning_rate": 1.1884300526100656e-06, "loss": 0.7511, "step": 22158 }, { "epoch": 0.7826958483634967, "grad_norm": 1.6701314449310303, "learning_rate": 1.1880598685437467e-06, "loss": 0.776, "step": 22159 }, { "epoch": 0.7827311701672046, "grad_norm": 1.571265459060669, "learning_rate": 1.187689734367382e-06, "loss": 0.7838, "step": 22160 }, { "epoch": 0.7827664919709125, "grad_norm": 0.887024998664856, "learning_rate": 1.1873196500858182e-06, "loss": 0.5606, "step": 22161 }, { "epoch": 0.7828018137746204, "grad_norm": 1.859152913093567, "learning_rate": 1.1869496157038989e-06, "loss": 0.7744, "step": 22162 }, { "epoch": 0.7828371355783283, "grad_norm": 1.7671585083007812, "learning_rate": 1.1865796312264654e-06, "loss": 0.7781, "step": 22163 }, { "epoch": 0.7828724573820363, "grad_norm": 1.5581611394882202, "learning_rate": 1.1862096966583604e-06, "loss": 0.7714, "step": 22164 }, { "epoch": 0.7829077791857442, "grad_norm": 1.7419081926345825, "learning_rate": 1.1858398120044274e-06, "loss": 0.7768, "step": 22165 }, { "epoch": 0.7829431009894521, "grad_norm": 2.0524094104766846, "learning_rate": 1.1854699772695049e-06, "loss": 0.7412, "step": 22166 }, { "epoch": 0.7829784227931599, "grad_norm": 1.7816357612609863, "learning_rate": 1.1851001924584339e-06, "loss": 0.797, "step": 22167 }, { "epoch": 0.7830137445968678, "grad_norm": 1.6069796085357666, "learning_rate": 1.184730457576056e-06, "loss": 0.7928, "step": 22168 }, { "epoch": 0.7830490664005757, "grad_norm": 1.6949362754821777, "learning_rate": 1.1843607726272072e-06, "loss": 0.7605, "step": 22169 }, { "epoch": 0.7830843882042836, "grad_norm": 1.660539984703064, "learning_rate": 1.1839911376167274e-06, "loss": 0.7744, "step": 22170 }, { "epoch": 0.7831197100079915, "grad_norm": 1.6983754634857178, "learning_rate": 1.1836215525494555e-06, "loss": 0.807, "step": 22171 }, { "epoch": 0.7831550318116994, "grad_norm": 1.8163175582885742, "learning_rate": 1.1832520174302276e-06, "loss": 0.7851, "step": 22172 }, { "epoch": 0.7831903536154073, "grad_norm": 1.6621276140213013, "learning_rate": 1.1828825322638782e-06, "loss": 0.7496, "step": 22173 }, { "epoch": 0.7832256754191153, "grad_norm": 1.6630910634994507, "learning_rate": 1.182513097055245e-06, "loss": 0.7688, "step": 22174 }, { "epoch": 0.7832609972228232, "grad_norm": 2.3332884311676025, "learning_rate": 1.182143711809164e-06, "loss": 0.7757, "step": 22175 }, { "epoch": 0.7832963190265311, "grad_norm": 1.697634220123291, "learning_rate": 1.1817743765304667e-06, "loss": 0.7874, "step": 22176 }, { "epoch": 0.783331640830239, "grad_norm": 1.568215012550354, "learning_rate": 1.1814050912239894e-06, "loss": 0.7771, "step": 22177 }, { "epoch": 0.7833669626339469, "grad_norm": 1.5629276037216187, "learning_rate": 1.181035855894565e-06, "loss": 0.7321, "step": 22178 }, { "epoch": 0.7834022844376548, "grad_norm": 1.5803502798080444, "learning_rate": 1.1806666705470242e-06, "loss": 0.7684, "step": 22179 }, { "epoch": 0.7834376062413627, "grad_norm": 1.7485377788543701, "learning_rate": 1.1802975351862005e-06, "loss": 0.7853, "step": 22180 }, { "epoch": 0.7834729280450706, "grad_norm": 1.6517810821533203, "learning_rate": 1.179928449816926e-06, "loss": 0.7833, "step": 22181 }, { "epoch": 0.7835082498487785, "grad_norm": 2.2883052825927734, "learning_rate": 1.179559414444028e-06, "loss": 0.7853, "step": 22182 }, { "epoch": 0.7835435716524864, "grad_norm": 2.312394380569458, "learning_rate": 1.1791904290723383e-06, "loss": 0.7449, "step": 22183 }, { "epoch": 0.7835788934561944, "grad_norm": 1.6201497316360474, "learning_rate": 1.1788214937066878e-06, "loss": 0.7408, "step": 22184 }, { "epoch": 0.7836142152599023, "grad_norm": 1.5204627513885498, "learning_rate": 1.1784526083519016e-06, "loss": 0.7483, "step": 22185 }, { "epoch": 0.7836495370636102, "grad_norm": 4.087782859802246, "learning_rate": 1.1780837730128091e-06, "loss": 0.776, "step": 22186 }, { "epoch": 0.7836848588673181, "grad_norm": 1.6169992685317993, "learning_rate": 1.1777149876942394e-06, "loss": 0.7537, "step": 22187 }, { "epoch": 0.783720180671026, "grad_norm": 1.6676214933395386, "learning_rate": 1.1773462524010165e-06, "loss": 0.7616, "step": 22188 }, { "epoch": 0.7837555024747339, "grad_norm": 1.663581371307373, "learning_rate": 1.1769775671379668e-06, "loss": 0.7766, "step": 22189 }, { "epoch": 0.7837908242784418, "grad_norm": 1.7490674257278442, "learning_rate": 1.1766089319099173e-06, "loss": 0.7571, "step": 22190 }, { "epoch": 0.7838261460821497, "grad_norm": 1.7706135511398315, "learning_rate": 1.1762403467216914e-06, "loss": 0.7864, "step": 22191 }, { "epoch": 0.7838614678858576, "grad_norm": 1.8717052936553955, "learning_rate": 1.175871811578112e-06, "loss": 0.7728, "step": 22192 }, { "epoch": 0.7838967896895654, "grad_norm": 1.9577959775924683, "learning_rate": 1.1755033264840033e-06, "loss": 0.8089, "step": 22193 }, { "epoch": 0.7839321114932734, "grad_norm": 1.687570333480835, "learning_rate": 1.1751348914441896e-06, "loss": 0.7572, "step": 22194 }, { "epoch": 0.7839674332969813, "grad_norm": 1.7523044347763062, "learning_rate": 1.1747665064634894e-06, "loss": 0.7581, "step": 22195 }, { "epoch": 0.7840027551006892, "grad_norm": 1.6286693811416626, "learning_rate": 1.1743981715467267e-06, "loss": 0.7323, "step": 22196 }, { "epoch": 0.7840380769043971, "grad_norm": 1.4820780754089355, "learning_rate": 1.1740298866987233e-06, "loss": 0.7053, "step": 22197 }, { "epoch": 0.784073398708105, "grad_norm": 1.7081290483474731, "learning_rate": 1.1736616519242955e-06, "loss": 0.7817, "step": 22198 }, { "epoch": 0.7841087205118129, "grad_norm": 2.3440515995025635, "learning_rate": 1.1732934672282654e-06, "loss": 0.739, "step": 22199 }, { "epoch": 0.7841440423155208, "grad_norm": 1.7618461847305298, "learning_rate": 1.1729253326154506e-06, "loss": 0.7333, "step": 22200 }, { "epoch": 0.7841793641192287, "grad_norm": 2.0733835697174072, "learning_rate": 1.172557248090671e-06, "loss": 0.7804, "step": 22201 }, { "epoch": 0.7842146859229366, "grad_norm": 1.926360845565796, "learning_rate": 1.172189213658741e-06, "loss": 0.7901, "step": 22202 }, { "epoch": 0.7842500077266445, "grad_norm": 1.7157663106918335, "learning_rate": 1.1718212293244797e-06, "loss": 0.7455, "step": 22203 }, { "epoch": 0.7842853295303525, "grad_norm": 1.6391534805297852, "learning_rate": 1.1714532950927033e-06, "loss": 0.7598, "step": 22204 }, { "epoch": 0.7843206513340604, "grad_norm": 1.7229129076004028, "learning_rate": 1.1710854109682257e-06, "loss": 0.7742, "step": 22205 }, { "epoch": 0.7843559731377683, "grad_norm": 1.9472512006759644, "learning_rate": 1.1707175769558622e-06, "loss": 0.7675, "step": 22206 }, { "epoch": 0.7843912949414762, "grad_norm": 1.8147006034851074, "learning_rate": 1.1703497930604285e-06, "loss": 0.7759, "step": 22207 }, { "epoch": 0.7844266167451841, "grad_norm": 1.5907682180404663, "learning_rate": 1.1699820592867366e-06, "loss": 0.7375, "step": 22208 }, { "epoch": 0.784461938548892, "grad_norm": 1.673506498336792, "learning_rate": 1.169614375639599e-06, "loss": 0.755, "step": 22209 }, { "epoch": 0.7844972603525999, "grad_norm": 1.7429996728897095, "learning_rate": 1.1692467421238274e-06, "loss": 0.7982, "step": 22210 }, { "epoch": 0.7845325821563078, "grad_norm": 1.753857135772705, "learning_rate": 1.1688791587442365e-06, "loss": 0.7438, "step": 22211 }, { "epoch": 0.7845679039600157, "grad_norm": 1.725178599357605, "learning_rate": 1.1685116255056334e-06, "loss": 0.7734, "step": 22212 }, { "epoch": 0.7846032257637237, "grad_norm": 2.5391769409179688, "learning_rate": 1.1681441424128298e-06, "loss": 0.8085, "step": 22213 }, { "epoch": 0.7846385475674316, "grad_norm": 1.653709053993225, "learning_rate": 1.1677767094706367e-06, "loss": 0.7481, "step": 22214 }, { "epoch": 0.7846738693711395, "grad_norm": 1.6933025121688843, "learning_rate": 1.1674093266838605e-06, "loss": 0.775, "step": 22215 }, { "epoch": 0.7847091911748474, "grad_norm": 1.74386465549469, "learning_rate": 1.1670419940573101e-06, "loss": 0.7882, "step": 22216 }, { "epoch": 0.7847445129785553, "grad_norm": 1.7500945329666138, "learning_rate": 1.1666747115957954e-06, "loss": 0.7673, "step": 22217 }, { "epoch": 0.7847798347822632, "grad_norm": 1.6900335550308228, "learning_rate": 1.1663074793041196e-06, "loss": 0.7626, "step": 22218 }, { "epoch": 0.784815156585971, "grad_norm": 1.6933667659759521, "learning_rate": 1.1659402971870915e-06, "loss": 0.7626, "step": 22219 }, { "epoch": 0.7848504783896789, "grad_norm": 1.6953963041305542, "learning_rate": 1.1655731652495173e-06, "loss": 0.7416, "step": 22220 }, { "epoch": 0.7848858001933868, "grad_norm": 1.7215183973312378, "learning_rate": 1.1652060834961987e-06, "loss": 0.8169, "step": 22221 }, { "epoch": 0.7849211219970947, "grad_norm": 1.6034588813781738, "learning_rate": 1.1648390519319424e-06, "loss": 0.7618, "step": 22222 }, { "epoch": 0.7849564438008027, "grad_norm": 1.706992268562317, "learning_rate": 1.1644720705615536e-06, "loss": 0.7673, "step": 22223 }, { "epoch": 0.7849917656045106, "grad_norm": 1.531467318534851, "learning_rate": 1.1641051393898311e-06, "loss": 0.7488, "step": 22224 }, { "epoch": 0.7850270874082185, "grad_norm": 1.6417635679244995, "learning_rate": 1.16373825842158e-06, "loss": 0.7457, "step": 22225 }, { "epoch": 0.7850624092119264, "grad_norm": 1.9552760124206543, "learning_rate": 1.1633714276616026e-06, "loss": 0.7565, "step": 22226 }, { "epoch": 0.7850977310156343, "grad_norm": 1.6935313940048218, "learning_rate": 1.1630046471146994e-06, "loss": 0.7422, "step": 22227 }, { "epoch": 0.7851330528193422, "grad_norm": 1.7579574584960938, "learning_rate": 1.1626379167856677e-06, "loss": 0.7488, "step": 22228 }, { "epoch": 0.7851683746230501, "grad_norm": 1.8519657850265503, "learning_rate": 1.1622712366793104e-06, "loss": 0.7838, "step": 22229 }, { "epoch": 0.785203696426758, "grad_norm": 1.8454689979553223, "learning_rate": 1.1619046068004265e-06, "loss": 0.7579, "step": 22230 }, { "epoch": 0.7852390182304659, "grad_norm": 1.6570827960968018, "learning_rate": 1.161538027153813e-06, "loss": 0.7106, "step": 22231 }, { "epoch": 0.7852743400341738, "grad_norm": 0.8965268731117249, "learning_rate": 1.1611714977442674e-06, "loss": 0.567, "step": 22232 }, { "epoch": 0.7853096618378818, "grad_norm": 1.6015790700912476, "learning_rate": 1.1608050185765895e-06, "loss": 0.72, "step": 22233 }, { "epoch": 0.7853449836415897, "grad_norm": 1.8298568725585938, "learning_rate": 1.160438589655572e-06, "loss": 0.7983, "step": 22234 }, { "epoch": 0.7853803054452976, "grad_norm": 1.935433030128479, "learning_rate": 1.160072210986013e-06, "loss": 0.7996, "step": 22235 }, { "epoch": 0.7854156272490055, "grad_norm": 1.6638076305389404, "learning_rate": 1.159705882572708e-06, "loss": 0.7361, "step": 22236 }, { "epoch": 0.7854509490527134, "grad_norm": 1.8276287317276, "learning_rate": 1.1593396044204497e-06, "loss": 0.7729, "step": 22237 }, { "epoch": 0.7854862708564213, "grad_norm": 1.7981432676315308, "learning_rate": 1.1589733765340323e-06, "loss": 0.8021, "step": 22238 }, { "epoch": 0.7855215926601292, "grad_norm": 1.6302917003631592, "learning_rate": 1.1586071989182507e-06, "loss": 0.7858, "step": 22239 }, { "epoch": 0.7855569144638371, "grad_norm": 2.06302809715271, "learning_rate": 1.1582410715778946e-06, "loss": 0.788, "step": 22240 }, { "epoch": 0.785592236267545, "grad_norm": 1.6552191972732544, "learning_rate": 1.1578749945177575e-06, "loss": 0.7833, "step": 22241 }, { "epoch": 0.785627558071253, "grad_norm": 1.6159802675247192, "learning_rate": 1.157508967742631e-06, "loss": 0.775, "step": 22242 }, { "epoch": 0.7856628798749609, "grad_norm": 1.555429458618164, "learning_rate": 1.157142991257304e-06, "loss": 0.7628, "step": 22243 }, { "epoch": 0.7856982016786688, "grad_norm": 1.6322762966156006, "learning_rate": 1.1567770650665673e-06, "loss": 0.7493, "step": 22244 }, { "epoch": 0.7857335234823766, "grad_norm": 1.6555172204971313, "learning_rate": 1.1564111891752106e-06, "loss": 0.7407, "step": 22245 }, { "epoch": 0.7857688452860845, "grad_norm": 1.6069167852401733, "learning_rate": 1.156045363588022e-06, "loss": 0.7689, "step": 22246 }, { "epoch": 0.7858041670897924, "grad_norm": 1.6932978630065918, "learning_rate": 1.1556795883097882e-06, "loss": 0.7542, "step": 22247 }, { "epoch": 0.7858394888935003, "grad_norm": 1.7673414945602417, "learning_rate": 1.1553138633452966e-06, "loss": 0.7546, "step": 22248 }, { "epoch": 0.7858748106972082, "grad_norm": 1.4972506761550903, "learning_rate": 1.1549481886993363e-06, "loss": 0.719, "step": 22249 }, { "epoch": 0.7859101325009161, "grad_norm": 1.6753172874450684, "learning_rate": 1.1545825643766895e-06, "loss": 0.7423, "step": 22250 }, { "epoch": 0.785945454304624, "grad_norm": 1.5518182516098022, "learning_rate": 1.1542169903821437e-06, "loss": 0.7171, "step": 22251 }, { "epoch": 0.785980776108332, "grad_norm": 1.819353461265564, "learning_rate": 1.153851466720484e-06, "loss": 0.7683, "step": 22252 }, { "epoch": 0.7860160979120399, "grad_norm": 1.8515158891677856, "learning_rate": 1.1534859933964927e-06, "loss": 0.7886, "step": 22253 }, { "epoch": 0.7860514197157478, "grad_norm": 1.7009704113006592, "learning_rate": 1.1531205704149529e-06, "loss": 0.7441, "step": 22254 }, { "epoch": 0.7860867415194557, "grad_norm": 1.7593196630477905, "learning_rate": 1.1527551977806495e-06, "loss": 0.7685, "step": 22255 }, { "epoch": 0.7861220633231636, "grad_norm": 1.7380099296569824, "learning_rate": 1.1523898754983616e-06, "loss": 0.7813, "step": 22256 }, { "epoch": 0.7861573851268715, "grad_norm": 1.8382222652435303, "learning_rate": 1.152024603572871e-06, "loss": 0.814, "step": 22257 }, { "epoch": 0.7861927069305794, "grad_norm": 1.6059296131134033, "learning_rate": 1.15165938200896e-06, "loss": 0.7705, "step": 22258 }, { "epoch": 0.7862280287342873, "grad_norm": 1.7445334196090698, "learning_rate": 1.1512942108114083e-06, "loss": 0.7279, "step": 22259 }, { "epoch": 0.7862633505379952, "grad_norm": 1.693932294845581, "learning_rate": 1.1509290899849935e-06, "loss": 0.7654, "step": 22260 }, { "epoch": 0.7862986723417031, "grad_norm": 1.6603658199310303, "learning_rate": 1.1505640195344952e-06, "loss": 0.7446, "step": 22261 }, { "epoch": 0.786333994145411, "grad_norm": 2.0417535305023193, "learning_rate": 1.1501989994646927e-06, "loss": 0.8188, "step": 22262 }, { "epoch": 0.786369315949119, "grad_norm": 1.5851142406463623, "learning_rate": 1.1498340297803622e-06, "loss": 0.7197, "step": 22263 }, { "epoch": 0.7864046377528269, "grad_norm": 1.6059376001358032, "learning_rate": 1.1494691104862788e-06, "loss": 0.7723, "step": 22264 }, { "epoch": 0.7864399595565348, "grad_norm": 0.9224773645401001, "learning_rate": 1.149104241587221e-06, "loss": 0.5787, "step": 22265 }, { "epoch": 0.7864752813602427, "grad_norm": 1.8405990600585938, "learning_rate": 1.1487394230879617e-06, "loss": 0.7761, "step": 22266 }, { "epoch": 0.7865106031639506, "grad_norm": 1.7906156778335571, "learning_rate": 1.1483746549932766e-06, "loss": 0.7649, "step": 22267 }, { "epoch": 0.7865459249676585, "grad_norm": 1.7203021049499512, "learning_rate": 1.1480099373079407e-06, "loss": 0.7682, "step": 22268 }, { "epoch": 0.7865812467713664, "grad_norm": 1.797286033630371, "learning_rate": 1.1476452700367275e-06, "loss": 0.7807, "step": 22269 }, { "epoch": 0.7866165685750743, "grad_norm": 1.6812489032745361, "learning_rate": 1.1472806531844078e-06, "loss": 0.7981, "step": 22270 }, { "epoch": 0.7866518903787821, "grad_norm": 1.5506622791290283, "learning_rate": 1.1469160867557544e-06, "loss": 0.7423, "step": 22271 }, { "epoch": 0.78668721218249, "grad_norm": 1.7737401723861694, "learning_rate": 1.1465515707555403e-06, "loss": 0.7766, "step": 22272 }, { "epoch": 0.786722533986198, "grad_norm": 1.7339848279953003, "learning_rate": 1.146187105188533e-06, "loss": 0.7484, "step": 22273 }, { "epoch": 0.7867578557899059, "grad_norm": 1.9917055368423462, "learning_rate": 1.1458226900595054e-06, "loss": 0.7469, "step": 22274 }, { "epoch": 0.7867931775936138, "grad_norm": 1.6014623641967773, "learning_rate": 1.145458325373227e-06, "loss": 0.7649, "step": 22275 }, { "epoch": 0.7868284993973217, "grad_norm": 1.7063392400741577, "learning_rate": 1.1450940111344638e-06, "loss": 0.7994, "step": 22276 }, { "epoch": 0.7868638212010296, "grad_norm": 2.105257987976074, "learning_rate": 1.1447297473479857e-06, "loss": 0.7357, "step": 22277 }, { "epoch": 0.7868991430047375, "grad_norm": 1.7401515245437622, "learning_rate": 1.1443655340185616e-06, "loss": 0.7551, "step": 22278 }, { "epoch": 0.7869344648084454, "grad_norm": 1.6827497482299805, "learning_rate": 1.1440013711509546e-06, "loss": 0.7925, "step": 22279 }, { "epoch": 0.7869697866121533, "grad_norm": 0.9688250422477722, "learning_rate": 1.1436372587499333e-06, "loss": 0.5818, "step": 22280 }, { "epoch": 0.7870051084158612, "grad_norm": 1.7170807123184204, "learning_rate": 1.1432731968202643e-06, "loss": 0.767, "step": 22281 }, { "epoch": 0.7870404302195692, "grad_norm": 1.8023779392242432, "learning_rate": 1.14290918536671e-06, "loss": 0.8058, "step": 22282 }, { "epoch": 0.7870757520232771, "grad_norm": 1.6710363626480103, "learning_rate": 1.1425452243940344e-06, "loss": 0.7997, "step": 22283 }, { "epoch": 0.787111073826985, "grad_norm": 3.0228846073150635, "learning_rate": 1.1421813139070015e-06, "loss": 0.7707, "step": 22284 }, { "epoch": 0.7871463956306929, "grad_norm": 1.7938193082809448, "learning_rate": 1.141817453910376e-06, "loss": 0.7786, "step": 22285 }, { "epoch": 0.7871817174344008, "grad_norm": 4.5878214836120605, "learning_rate": 1.1414536444089164e-06, "loss": 0.7777, "step": 22286 }, { "epoch": 0.7872170392381087, "grad_norm": 1.7830523252487183, "learning_rate": 1.1410898854073864e-06, "loss": 0.7383, "step": 22287 }, { "epoch": 0.7872523610418166, "grad_norm": 1.655880331993103, "learning_rate": 1.1407261769105488e-06, "loss": 0.7589, "step": 22288 }, { "epoch": 0.7872876828455245, "grad_norm": 1.5852125883102417, "learning_rate": 1.1403625189231593e-06, "loss": 0.7497, "step": 22289 }, { "epoch": 0.7873230046492324, "grad_norm": 0.9339737892150879, "learning_rate": 1.13999891144998e-06, "loss": 0.582, "step": 22290 }, { "epoch": 0.7873583264529403, "grad_norm": 1.572062373161316, "learning_rate": 1.1396353544957705e-06, "loss": 0.7459, "step": 22291 }, { "epoch": 0.7873936482566483, "grad_norm": 2.7965147495269775, "learning_rate": 1.1392718480652865e-06, "loss": 0.7546, "step": 22292 }, { "epoch": 0.7874289700603562, "grad_norm": 1.6587824821472168, "learning_rate": 1.1389083921632865e-06, "loss": 0.7707, "step": 22293 }, { "epoch": 0.7874642918640641, "grad_norm": 1.7719672918319702, "learning_rate": 1.1385449867945297e-06, "loss": 0.7461, "step": 22294 }, { "epoch": 0.787499613667772, "grad_norm": 1.732414960861206, "learning_rate": 1.1381816319637684e-06, "loss": 0.8034, "step": 22295 }, { "epoch": 0.7875349354714799, "grad_norm": 1.7963030338287354, "learning_rate": 1.1378183276757597e-06, "loss": 0.7495, "step": 22296 }, { "epoch": 0.7875702572751878, "grad_norm": 1.6398485898971558, "learning_rate": 1.1374550739352608e-06, "loss": 0.7715, "step": 22297 }, { "epoch": 0.7876055790788956, "grad_norm": 1.9188977479934692, "learning_rate": 1.137091870747022e-06, "loss": 0.7809, "step": 22298 }, { "epoch": 0.7876409008826035, "grad_norm": 1.669323444366455, "learning_rate": 1.1367287181157988e-06, "loss": 0.7377, "step": 22299 }, { "epoch": 0.7876762226863114, "grad_norm": 1.512166142463684, "learning_rate": 1.1363656160463454e-06, "loss": 0.7376, "step": 22300 }, { "epoch": 0.7877115444900193, "grad_norm": 1.613655686378479, "learning_rate": 1.1360025645434125e-06, "loss": 0.7689, "step": 22301 }, { "epoch": 0.7877468662937273, "grad_norm": 1.9546440839767456, "learning_rate": 1.13563956361175e-06, "loss": 0.7574, "step": 22302 }, { "epoch": 0.7877821880974352, "grad_norm": 1.813679814338684, "learning_rate": 1.1352766132561105e-06, "loss": 0.7849, "step": 22303 }, { "epoch": 0.7878175099011431, "grad_norm": 1.5959665775299072, "learning_rate": 1.134913713481246e-06, "loss": 0.734, "step": 22304 }, { "epoch": 0.787852831704851, "grad_norm": 1.6288610696792603, "learning_rate": 1.1345508642919023e-06, "loss": 0.7551, "step": 22305 }, { "epoch": 0.7878881535085589, "grad_norm": 2.0735349655151367, "learning_rate": 1.1341880656928305e-06, "loss": 0.7653, "step": 22306 }, { "epoch": 0.7879234753122668, "grad_norm": 1.7829921245574951, "learning_rate": 1.1338253176887804e-06, "loss": 0.7673, "step": 22307 }, { "epoch": 0.7879587971159747, "grad_norm": 1.472158432006836, "learning_rate": 1.133462620284496e-06, "loss": 0.7326, "step": 22308 }, { "epoch": 0.7879941189196826, "grad_norm": 1.712690830230713, "learning_rate": 1.1330999734847258e-06, "loss": 0.7725, "step": 22309 }, { "epoch": 0.7880294407233905, "grad_norm": 1.6011286973953247, "learning_rate": 1.1327373772942179e-06, "loss": 0.764, "step": 22310 }, { "epoch": 0.7880647625270985, "grad_norm": 1.8445967435836792, "learning_rate": 1.1323748317177152e-06, "loss": 0.8221, "step": 22311 }, { "epoch": 0.7881000843308064, "grad_norm": 1.7891347408294678, "learning_rate": 1.1320123367599635e-06, "loss": 0.7651, "step": 22312 }, { "epoch": 0.7881354061345143, "grad_norm": 1.6943484544754028, "learning_rate": 1.131649892425708e-06, "loss": 0.758, "step": 22313 }, { "epoch": 0.7881707279382222, "grad_norm": 1.7833675146102905, "learning_rate": 1.131287498719691e-06, "loss": 0.7634, "step": 22314 }, { "epoch": 0.7882060497419301, "grad_norm": 1.6019930839538574, "learning_rate": 1.1309251556466556e-06, "loss": 0.7828, "step": 22315 }, { "epoch": 0.788241371545638, "grad_norm": 1.810807228088379, "learning_rate": 1.1305628632113441e-06, "loss": 0.7782, "step": 22316 }, { "epoch": 0.7882766933493459, "grad_norm": 1.6706669330596924, "learning_rate": 1.1302006214185002e-06, "loss": 0.7629, "step": 22317 }, { "epoch": 0.7883120151530538, "grad_norm": 1.8747678995132446, "learning_rate": 1.1298384302728615e-06, "loss": 0.7843, "step": 22318 }, { "epoch": 0.7883473369567617, "grad_norm": 1.6586835384368896, "learning_rate": 1.1294762897791716e-06, "loss": 0.7644, "step": 22319 }, { "epoch": 0.7883826587604696, "grad_norm": 1.8700461387634277, "learning_rate": 1.1291141999421684e-06, "loss": 0.7645, "step": 22320 }, { "epoch": 0.7884179805641776, "grad_norm": 1.6663402318954468, "learning_rate": 1.1287521607665896e-06, "loss": 0.7533, "step": 22321 }, { "epoch": 0.7884533023678855, "grad_norm": 1.8947532176971436, "learning_rate": 1.1283901722571749e-06, "loss": 0.7751, "step": 22322 }, { "epoch": 0.7884886241715934, "grad_norm": 1.8439087867736816, "learning_rate": 1.1280282344186627e-06, "loss": 0.7666, "step": 22323 }, { "epoch": 0.7885239459753012, "grad_norm": 1.70741868019104, "learning_rate": 1.127666347255788e-06, "loss": 0.778, "step": 22324 }, { "epoch": 0.7885592677790091, "grad_norm": 1.6697909832000732, "learning_rate": 1.1273045107732882e-06, "loss": 0.7548, "step": 22325 }, { "epoch": 0.788594589582717, "grad_norm": 1.7643141746520996, "learning_rate": 1.126942724975899e-06, "loss": 0.7709, "step": 22326 }, { "epoch": 0.7886299113864249, "grad_norm": 1.5969953536987305, "learning_rate": 1.1265809898683571e-06, "loss": 0.7539, "step": 22327 }, { "epoch": 0.7886652331901328, "grad_norm": 1.7809245586395264, "learning_rate": 1.1262193054553932e-06, "loss": 0.7445, "step": 22328 }, { "epoch": 0.7887005549938407, "grad_norm": 1.7531124353408813, "learning_rate": 1.125857671741743e-06, "loss": 0.7974, "step": 22329 }, { "epoch": 0.7887358767975486, "grad_norm": 1.8740991353988647, "learning_rate": 1.1254960887321408e-06, "loss": 0.7603, "step": 22330 }, { "epoch": 0.7887711986012566, "grad_norm": 1.5587775707244873, "learning_rate": 1.125134556431316e-06, "loss": 0.7502, "step": 22331 }, { "epoch": 0.7888065204049645, "grad_norm": 1.6114144325256348, "learning_rate": 1.1247730748440022e-06, "loss": 0.7712, "step": 22332 }, { "epoch": 0.7888418422086724, "grad_norm": 1.5840868949890137, "learning_rate": 1.1244116439749309e-06, "loss": 0.7622, "step": 22333 }, { "epoch": 0.7888771640123803, "grad_norm": 1.6515557765960693, "learning_rate": 1.1240502638288298e-06, "loss": 0.749, "step": 22334 }, { "epoch": 0.7889124858160882, "grad_norm": 1.5204075574874878, "learning_rate": 1.1236889344104307e-06, "loss": 0.7449, "step": 22335 }, { "epoch": 0.7889478076197961, "grad_norm": 1.978793740272522, "learning_rate": 1.1233276557244632e-06, "loss": 0.7382, "step": 22336 }, { "epoch": 0.788983129423504, "grad_norm": 1.6039611101150513, "learning_rate": 1.1229664277756547e-06, "loss": 0.7217, "step": 22337 }, { "epoch": 0.7890184512272119, "grad_norm": 1.5560823678970337, "learning_rate": 1.1226052505687312e-06, "loss": 0.7629, "step": 22338 }, { "epoch": 0.7890537730309198, "grad_norm": 1.6808621883392334, "learning_rate": 1.122244124108422e-06, "loss": 0.7499, "step": 22339 }, { "epoch": 0.7890890948346277, "grad_norm": 1.6266385316848755, "learning_rate": 1.1218830483994537e-06, "loss": 0.7744, "step": 22340 }, { "epoch": 0.7891244166383357, "grad_norm": 1.7540169954299927, "learning_rate": 1.1215220234465496e-06, "loss": 0.793, "step": 22341 }, { "epoch": 0.7891597384420436, "grad_norm": 1.6599392890930176, "learning_rate": 1.1211610492544362e-06, "loss": 0.7704, "step": 22342 }, { "epoch": 0.7891950602457515, "grad_norm": 2.2646098136901855, "learning_rate": 1.1208001258278395e-06, "loss": 0.7458, "step": 22343 }, { "epoch": 0.7892303820494594, "grad_norm": 1.7294435501098633, "learning_rate": 1.1204392531714798e-06, "loss": 0.7418, "step": 22344 }, { "epoch": 0.7892657038531673, "grad_norm": 1.6458756923675537, "learning_rate": 1.120078431290082e-06, "loss": 0.7681, "step": 22345 }, { "epoch": 0.7893010256568752, "grad_norm": 2.2436866760253906, "learning_rate": 1.1197176601883697e-06, "loss": 0.7515, "step": 22346 }, { "epoch": 0.7893363474605831, "grad_norm": 1.7238963842391968, "learning_rate": 1.1193569398710618e-06, "loss": 0.7678, "step": 22347 }, { "epoch": 0.789371669264291, "grad_norm": 1.8783369064331055, "learning_rate": 1.1189962703428813e-06, "loss": 0.7823, "step": 22348 }, { "epoch": 0.7894069910679989, "grad_norm": 1.692102074623108, "learning_rate": 1.118635651608549e-06, "loss": 0.7857, "step": 22349 }, { "epoch": 0.7894423128717067, "grad_norm": 1.5898168087005615, "learning_rate": 1.1182750836727823e-06, "loss": 0.737, "step": 22350 }, { "epoch": 0.7894776346754147, "grad_norm": 1.725768804550171, "learning_rate": 1.1179145665403018e-06, "loss": 0.7659, "step": 22351 }, { "epoch": 0.7895129564791226, "grad_norm": 1.647416114807129, "learning_rate": 1.1175541002158268e-06, "loss": 0.7324, "step": 22352 }, { "epoch": 0.7895482782828305, "grad_norm": 1.5758776664733887, "learning_rate": 1.1171936847040727e-06, "loss": 0.7506, "step": 22353 }, { "epoch": 0.7895836000865384, "grad_norm": 1.6134835481643677, "learning_rate": 1.116833320009758e-06, "loss": 0.7442, "step": 22354 }, { "epoch": 0.7896189218902463, "grad_norm": 1.5740766525268555, "learning_rate": 1.1164730061376e-06, "loss": 0.7313, "step": 22355 }, { "epoch": 0.7896542436939542, "grad_norm": 1.651450514793396, "learning_rate": 1.1161127430923136e-06, "loss": 0.754, "step": 22356 }, { "epoch": 0.7896895654976621, "grad_norm": 1.85276198387146, "learning_rate": 1.1157525308786115e-06, "loss": 0.7782, "step": 22357 }, { "epoch": 0.78972488730137, "grad_norm": 1.8386684656143188, "learning_rate": 1.1153923695012103e-06, "loss": 0.7541, "step": 22358 }, { "epoch": 0.7897602091050779, "grad_norm": 1.9750882387161255, "learning_rate": 1.1150322589648254e-06, "loss": 0.7752, "step": 22359 }, { "epoch": 0.7897955309087858, "grad_norm": 1.7627936601638794, "learning_rate": 1.114672199274166e-06, "loss": 0.7362, "step": 22360 }, { "epoch": 0.7898308527124938, "grad_norm": 1.604962706565857, "learning_rate": 1.114312190433947e-06, "loss": 0.7594, "step": 22361 }, { "epoch": 0.7898661745162017, "grad_norm": 1.6932532787322998, "learning_rate": 1.1139522324488804e-06, "loss": 0.7445, "step": 22362 }, { "epoch": 0.7899014963199096, "grad_norm": 1.5813076496124268, "learning_rate": 1.1135923253236759e-06, "loss": 0.7922, "step": 22363 }, { "epoch": 0.7899368181236175, "grad_norm": 1.9406721591949463, "learning_rate": 1.1132324690630442e-06, "loss": 0.7569, "step": 22364 }, { "epoch": 0.7899721399273254, "grad_norm": 1.792734146118164, "learning_rate": 1.1128726636716963e-06, "loss": 0.7918, "step": 22365 }, { "epoch": 0.7900074617310333, "grad_norm": 1.7747036218643188, "learning_rate": 1.112512909154339e-06, "loss": 0.777, "step": 22366 }, { "epoch": 0.7900427835347412, "grad_norm": 1.6458712816238403, "learning_rate": 1.1121532055156824e-06, "loss": 0.7203, "step": 22367 }, { "epoch": 0.7900781053384491, "grad_norm": 1.6972182989120483, "learning_rate": 1.111793552760435e-06, "loss": 0.7769, "step": 22368 }, { "epoch": 0.790113427142157, "grad_norm": 0.8354344367980957, "learning_rate": 1.1114339508933008e-06, "loss": 0.5552, "step": 22369 }, { "epoch": 0.790148748945865, "grad_norm": 1.7128750085830688, "learning_rate": 1.1110743999189889e-06, "loss": 0.7733, "step": 22370 }, { "epoch": 0.7901840707495729, "grad_norm": 1.6599658727645874, "learning_rate": 1.1107148998422052e-06, "loss": 0.7719, "step": 22371 }, { "epoch": 0.7902193925532808, "grad_norm": 1.7676717042922974, "learning_rate": 1.1103554506676523e-06, "loss": 0.7491, "step": 22372 }, { "epoch": 0.7902547143569887, "grad_norm": 1.7408709526062012, "learning_rate": 1.1099960524000363e-06, "loss": 0.7537, "step": 22373 }, { "epoch": 0.7902900361606966, "grad_norm": 2.0514566898345947, "learning_rate": 1.109636705044062e-06, "loss": 0.8069, "step": 22374 }, { "epoch": 0.7903253579644045, "grad_norm": 2.040311813354492, "learning_rate": 1.1092774086044312e-06, "loss": 0.7619, "step": 22375 }, { "epoch": 0.7903606797681123, "grad_norm": 1.995232343673706, "learning_rate": 1.1089181630858447e-06, "loss": 0.743, "step": 22376 }, { "epoch": 0.7903960015718202, "grad_norm": 2.0187971591949463, "learning_rate": 1.1085589684930065e-06, "loss": 0.7205, "step": 22377 }, { "epoch": 0.7904313233755281, "grad_norm": 1.6404023170471191, "learning_rate": 1.1081998248306175e-06, "loss": 0.7895, "step": 22378 }, { "epoch": 0.790466645179236, "grad_norm": 1.7994236946105957, "learning_rate": 1.107840732103377e-06, "loss": 0.782, "step": 22379 }, { "epoch": 0.790501966982944, "grad_norm": 1.8116272687911987, "learning_rate": 1.1074816903159846e-06, "loss": 0.7324, "step": 22380 }, { "epoch": 0.7905372887866519, "grad_norm": 1.6514582633972168, "learning_rate": 1.107122699473142e-06, "loss": 0.7251, "step": 22381 }, { "epoch": 0.7905726105903598, "grad_norm": 2.5659573078155518, "learning_rate": 1.1067637595795445e-06, "loss": 0.7669, "step": 22382 }, { "epoch": 0.7906079323940677, "grad_norm": 1.8659429550170898, "learning_rate": 1.106404870639891e-06, "loss": 0.7716, "step": 22383 }, { "epoch": 0.7906432541977756, "grad_norm": 1.560089111328125, "learning_rate": 1.1060460326588785e-06, "loss": 0.7519, "step": 22384 }, { "epoch": 0.7906785760014835, "grad_norm": 1.6866252422332764, "learning_rate": 1.1056872456412048e-06, "loss": 0.7874, "step": 22385 }, { "epoch": 0.7907138978051914, "grad_norm": 1.7806013822555542, "learning_rate": 1.1053285095915634e-06, "loss": 0.8, "step": 22386 }, { "epoch": 0.7907492196088993, "grad_norm": 2.2794864177703857, "learning_rate": 1.1049698245146496e-06, "loss": 0.7897, "step": 22387 }, { "epoch": 0.7907845414126072, "grad_norm": 2.1700239181518555, "learning_rate": 1.1046111904151608e-06, "loss": 0.7811, "step": 22388 }, { "epoch": 0.7908198632163151, "grad_norm": 1.8074127435684204, "learning_rate": 1.1042526072977866e-06, "loss": 0.8015, "step": 22389 }, { "epoch": 0.790855185020023, "grad_norm": 1.6895064115524292, "learning_rate": 1.1038940751672216e-06, "loss": 0.7826, "step": 22390 }, { "epoch": 0.790890506823731, "grad_norm": 1.824605941772461, "learning_rate": 1.1035355940281605e-06, "loss": 0.7779, "step": 22391 }, { "epoch": 0.7909258286274389, "grad_norm": 1.914122223854065, "learning_rate": 1.1031771638852917e-06, "loss": 0.7702, "step": 22392 }, { "epoch": 0.7909611504311468, "grad_norm": 1.797860026359558, "learning_rate": 1.1028187847433091e-06, "loss": 0.7811, "step": 22393 }, { "epoch": 0.7909964722348547, "grad_norm": 1.7192447185516357, "learning_rate": 1.1024604566069003e-06, "loss": 0.7896, "step": 22394 }, { "epoch": 0.7910317940385626, "grad_norm": 1.6967041492462158, "learning_rate": 1.1021021794807574e-06, "loss": 0.7726, "step": 22395 }, { "epoch": 0.7910671158422705, "grad_norm": 1.647121548652649, "learning_rate": 1.1017439533695673e-06, "loss": 0.7304, "step": 22396 }, { "epoch": 0.7911024376459784, "grad_norm": 1.777038335800171, "learning_rate": 1.1013857782780195e-06, "loss": 0.7298, "step": 22397 }, { "epoch": 0.7911377594496863, "grad_norm": 1.6459828615188599, "learning_rate": 1.1010276542108029e-06, "loss": 0.7251, "step": 22398 }, { "epoch": 0.7911730812533943, "grad_norm": 1.7374228239059448, "learning_rate": 1.1006695811726026e-06, "loss": 0.7765, "step": 22399 }, { "epoch": 0.7912084030571022, "grad_norm": 1.6227166652679443, "learning_rate": 1.100311559168105e-06, "loss": 0.7779, "step": 22400 }, { "epoch": 0.7912437248608101, "grad_norm": 1.6746726036071777, "learning_rate": 1.099953588201999e-06, "loss": 0.7715, "step": 22401 }, { "epoch": 0.7912790466645179, "grad_norm": 1.5362120866775513, "learning_rate": 1.0995956682789654e-06, "loss": 0.7355, "step": 22402 }, { "epoch": 0.7913143684682258, "grad_norm": 1.684648871421814, "learning_rate": 1.0992377994036906e-06, "loss": 0.7535, "step": 22403 }, { "epoch": 0.7913496902719337, "grad_norm": 1.812700867652893, "learning_rate": 1.09887998158086e-06, "loss": 0.7406, "step": 22404 }, { "epoch": 0.7913850120756416, "grad_norm": 2.731807231903076, "learning_rate": 1.0985222148151526e-06, "loss": 0.7452, "step": 22405 }, { "epoch": 0.7914203338793495, "grad_norm": 1.7086879014968872, "learning_rate": 1.0981644991112533e-06, "loss": 0.7818, "step": 22406 }, { "epoch": 0.7914556556830574, "grad_norm": 2.369123935699463, "learning_rate": 1.0978068344738458e-06, "loss": 0.7447, "step": 22407 }, { "epoch": 0.7914909774867653, "grad_norm": 4.827022075653076, "learning_rate": 1.0974492209076065e-06, "loss": 0.7472, "step": 22408 }, { "epoch": 0.7915262992904732, "grad_norm": 1.6993048191070557, "learning_rate": 1.0970916584172186e-06, "loss": 0.7645, "step": 22409 }, { "epoch": 0.7915616210941812, "grad_norm": 1.6901836395263672, "learning_rate": 1.0967341470073627e-06, "loss": 0.7892, "step": 22410 }, { "epoch": 0.7915969428978891, "grad_norm": 1.8917112350463867, "learning_rate": 1.096376686682717e-06, "loss": 0.7667, "step": 22411 }, { "epoch": 0.791632264701597, "grad_norm": 1.6552623510360718, "learning_rate": 1.0960192774479577e-06, "loss": 0.7846, "step": 22412 }, { "epoch": 0.7916675865053049, "grad_norm": 2.2570674419403076, "learning_rate": 1.095661919307764e-06, "loss": 0.7601, "step": 22413 }, { "epoch": 0.7917029083090128, "grad_norm": 1.804551601409912, "learning_rate": 1.0953046122668149e-06, "loss": 0.7554, "step": 22414 }, { "epoch": 0.7917382301127207, "grad_norm": 2.008239984512329, "learning_rate": 1.0949473563297835e-06, "loss": 0.7684, "step": 22415 }, { "epoch": 0.7917735519164286, "grad_norm": 1.740776777267456, "learning_rate": 1.0945901515013468e-06, "loss": 0.7896, "step": 22416 }, { "epoch": 0.7918088737201365, "grad_norm": 1.8253988027572632, "learning_rate": 1.094232997786182e-06, "loss": 0.7588, "step": 22417 }, { "epoch": 0.7918441955238444, "grad_norm": 1.7211376428604126, "learning_rate": 1.09387589518896e-06, "loss": 0.7102, "step": 22418 }, { "epoch": 0.7918795173275524, "grad_norm": 2.272554636001587, "learning_rate": 1.0935188437143557e-06, "loss": 0.7619, "step": 22419 }, { "epoch": 0.7919148391312603, "grad_norm": 1.5030969381332397, "learning_rate": 1.0931618433670439e-06, "loss": 0.7634, "step": 22420 }, { "epoch": 0.7919501609349682, "grad_norm": 1.7267721891403198, "learning_rate": 1.0928048941516944e-06, "loss": 0.8137, "step": 22421 }, { "epoch": 0.7919854827386761, "grad_norm": 0.9475700855255127, "learning_rate": 1.0924479960729795e-06, "loss": 0.5503, "step": 22422 }, { "epoch": 0.792020804542384, "grad_norm": 1.5155084133148193, "learning_rate": 1.0920911491355724e-06, "loss": 0.7464, "step": 22423 }, { "epoch": 0.7920561263460919, "grad_norm": 1.6968122720718384, "learning_rate": 1.0917343533441404e-06, "loss": 0.7371, "step": 22424 }, { "epoch": 0.7920914481497998, "grad_norm": 1.6493237018585205, "learning_rate": 1.0913776087033545e-06, "loss": 0.7828, "step": 22425 }, { "epoch": 0.7921267699535077, "grad_norm": 1.7626744508743286, "learning_rate": 1.0910209152178846e-06, "loss": 0.7858, "step": 22426 }, { "epoch": 0.7921620917572156, "grad_norm": 1.7802928686141968, "learning_rate": 1.0906642728923973e-06, "loss": 0.7687, "step": 22427 }, { "epoch": 0.7921974135609234, "grad_norm": 2.106762647628784, "learning_rate": 1.0903076817315612e-06, "loss": 0.7739, "step": 22428 }, { "epoch": 0.7922327353646313, "grad_norm": 1.8446928262710571, "learning_rate": 1.0899511417400443e-06, "loss": 0.7991, "step": 22429 }, { "epoch": 0.7922680571683393, "grad_norm": 1.7427420616149902, "learning_rate": 1.0895946529225116e-06, "loss": 0.7662, "step": 22430 }, { "epoch": 0.7923033789720472, "grad_norm": 1.803242564201355, "learning_rate": 1.0892382152836285e-06, "loss": 0.7881, "step": 22431 }, { "epoch": 0.7923387007757551, "grad_norm": 1.8317748308181763, "learning_rate": 1.0888818288280594e-06, "loss": 0.7789, "step": 22432 }, { "epoch": 0.792374022579463, "grad_norm": 1.694861888885498, "learning_rate": 1.0885254935604716e-06, "loss": 0.769, "step": 22433 }, { "epoch": 0.7924093443831709, "grad_norm": 1.9846136569976807, "learning_rate": 1.0881692094855257e-06, "loss": 0.7462, "step": 22434 }, { "epoch": 0.7924446661868788, "grad_norm": 2.3894176483154297, "learning_rate": 1.0878129766078855e-06, "loss": 0.7452, "step": 22435 }, { "epoch": 0.7924799879905867, "grad_norm": 1.8795572519302368, "learning_rate": 1.0874567949322152e-06, "loss": 0.7796, "step": 22436 }, { "epoch": 0.7925153097942946, "grad_norm": 1.844994068145752, "learning_rate": 1.0871006644631737e-06, "loss": 0.7434, "step": 22437 }, { "epoch": 0.7925506315980025, "grad_norm": 2.059875965118408, "learning_rate": 1.0867445852054232e-06, "loss": 0.7713, "step": 22438 }, { "epoch": 0.7925859534017105, "grad_norm": 1.7085926532745361, "learning_rate": 1.0863885571636251e-06, "loss": 0.7783, "step": 22439 }, { "epoch": 0.7926212752054184, "grad_norm": 1.6312761306762695, "learning_rate": 1.0860325803424364e-06, "loss": 0.749, "step": 22440 }, { "epoch": 0.7926565970091263, "grad_norm": 1.741409420967102, "learning_rate": 1.0856766547465175e-06, "loss": 0.7439, "step": 22441 }, { "epoch": 0.7926919188128342, "grad_norm": 2.005950689315796, "learning_rate": 1.0853207803805272e-06, "loss": 0.7981, "step": 22442 }, { "epoch": 0.7927272406165421, "grad_norm": 1.7421939373016357, "learning_rate": 1.0849649572491232e-06, "loss": 0.7468, "step": 22443 }, { "epoch": 0.79276256242025, "grad_norm": 1.6736156940460205, "learning_rate": 1.0846091853569612e-06, "loss": 0.755, "step": 22444 }, { "epoch": 0.7927978842239579, "grad_norm": 1.732305884361267, "learning_rate": 1.0842534647086984e-06, "loss": 0.7721, "step": 22445 }, { "epoch": 0.7928332060276658, "grad_norm": 1.7459471225738525, "learning_rate": 1.0838977953089907e-06, "loss": 0.7876, "step": 22446 }, { "epoch": 0.7928685278313737, "grad_norm": 1.8517980575561523, "learning_rate": 1.0835421771624915e-06, "loss": 0.7762, "step": 22447 }, { "epoch": 0.7929038496350816, "grad_norm": 1.7865910530090332, "learning_rate": 1.0831866102738576e-06, "loss": 0.8121, "step": 22448 }, { "epoch": 0.7929391714387896, "grad_norm": 1.7132656574249268, "learning_rate": 1.082831094647741e-06, "loss": 0.7226, "step": 22449 }, { "epoch": 0.7929744932424975, "grad_norm": 1.756851315498352, "learning_rate": 1.0824756302887935e-06, "loss": 0.7625, "step": 22450 }, { "epoch": 0.7930098150462054, "grad_norm": 1.6704291105270386, "learning_rate": 1.0821202172016681e-06, "loss": 0.7448, "step": 22451 }, { "epoch": 0.7930451368499133, "grad_norm": 1.6580220460891724, "learning_rate": 1.0817648553910171e-06, "loss": 0.7973, "step": 22452 }, { "epoch": 0.7930804586536212, "grad_norm": 1.7656594514846802, "learning_rate": 1.0814095448614925e-06, "loss": 0.7633, "step": 22453 }, { "epoch": 0.793115780457329, "grad_norm": 1.6899166107177734, "learning_rate": 1.081054285617742e-06, "loss": 0.7299, "step": 22454 }, { "epoch": 0.7931511022610369, "grad_norm": 1.7101622819900513, "learning_rate": 1.080699077664416e-06, "loss": 0.7702, "step": 22455 }, { "epoch": 0.7931864240647448, "grad_norm": 1.7301828861236572, "learning_rate": 1.080343921006165e-06, "loss": 0.73, "step": 22456 }, { "epoch": 0.7932217458684527, "grad_norm": 1.6583812236785889, "learning_rate": 1.0799888156476346e-06, "loss": 0.7779, "step": 22457 }, { "epoch": 0.7932570676721606, "grad_norm": 1.8552207946777344, "learning_rate": 1.0796337615934738e-06, "loss": 0.7842, "step": 22458 }, { "epoch": 0.7932923894758686, "grad_norm": 2.090569496154785, "learning_rate": 1.079278758848331e-06, "loss": 0.8146, "step": 22459 }, { "epoch": 0.7933277112795765, "grad_norm": 1.6747409105300903, "learning_rate": 1.078923807416849e-06, "loss": 0.7792, "step": 22460 }, { "epoch": 0.7933630330832844, "grad_norm": 1.6142668724060059, "learning_rate": 1.0785689073036754e-06, "loss": 0.8016, "step": 22461 }, { "epoch": 0.7933983548869923, "grad_norm": 1.9185996055603027, "learning_rate": 1.078214058513456e-06, "loss": 0.7986, "step": 22462 }, { "epoch": 0.7934336766907002, "grad_norm": 3.245866298675537, "learning_rate": 1.0778592610508326e-06, "loss": 0.7657, "step": 22463 }, { "epoch": 0.7934689984944081, "grad_norm": 1.8704404830932617, "learning_rate": 1.0775045149204499e-06, "loss": 0.7945, "step": 22464 }, { "epoch": 0.793504320298116, "grad_norm": 1.7245397567749023, "learning_rate": 1.077149820126952e-06, "loss": 0.7746, "step": 22465 }, { "epoch": 0.7935396421018239, "grad_norm": 1.6447399854660034, "learning_rate": 1.0767951766749784e-06, "loss": 0.7517, "step": 22466 }, { "epoch": 0.7935749639055318, "grad_norm": 1.668777585029602, "learning_rate": 1.076440584569174e-06, "loss": 0.7732, "step": 22467 }, { "epoch": 0.7936102857092397, "grad_norm": 1.655733585357666, "learning_rate": 1.0760860438141757e-06, "loss": 0.7519, "step": 22468 }, { "epoch": 0.7936456075129477, "grad_norm": 1.7318414449691772, "learning_rate": 1.075731554414627e-06, "loss": 0.7785, "step": 22469 }, { "epoch": 0.7936809293166556, "grad_norm": 1.793986439704895, "learning_rate": 1.0753771163751648e-06, "loss": 0.7401, "step": 22470 }, { "epoch": 0.7937162511203635, "grad_norm": 1.622410535812378, "learning_rate": 1.075022729700429e-06, "loss": 0.7664, "step": 22471 }, { "epoch": 0.7937515729240714, "grad_norm": 2.018244504928589, "learning_rate": 1.0746683943950597e-06, "loss": 0.7456, "step": 22472 }, { "epoch": 0.7937868947277793, "grad_norm": 1.6025151014328003, "learning_rate": 1.0743141104636912e-06, "loss": 0.7744, "step": 22473 }, { "epoch": 0.7938222165314872, "grad_norm": 1.75470769405365, "learning_rate": 1.0739598779109611e-06, "loss": 0.7724, "step": 22474 }, { "epoch": 0.7938575383351951, "grad_norm": 1.627846598625183, "learning_rate": 1.0736056967415076e-06, "loss": 0.7738, "step": 22475 }, { "epoch": 0.793892860138903, "grad_norm": 1.8013266324996948, "learning_rate": 1.0732515669599636e-06, "loss": 0.7659, "step": 22476 }, { "epoch": 0.7939281819426109, "grad_norm": 1.7120416164398193, "learning_rate": 1.0728974885709647e-06, "loss": 0.7718, "step": 22477 }, { "epoch": 0.7939635037463189, "grad_norm": 4.010523319244385, "learning_rate": 1.0725434615791468e-06, "loss": 0.806, "step": 22478 }, { "epoch": 0.7939988255500268, "grad_norm": 1.868560552597046, "learning_rate": 1.0721894859891407e-06, "loss": 0.7675, "step": 22479 }, { "epoch": 0.7940341473537346, "grad_norm": 1.790671467781067, "learning_rate": 1.0718355618055798e-06, "loss": 0.7709, "step": 22480 }, { "epoch": 0.7940694691574425, "grad_norm": 0.9837051630020142, "learning_rate": 1.0714816890330982e-06, "loss": 0.5985, "step": 22481 }, { "epoch": 0.7941047909611504, "grad_norm": 1.8788905143737793, "learning_rate": 1.0711278676763242e-06, "loss": 0.7593, "step": 22482 }, { "epoch": 0.7941401127648583, "grad_norm": 1.755493402481079, "learning_rate": 1.0707740977398906e-06, "loss": 0.772, "step": 22483 }, { "epoch": 0.7941754345685662, "grad_norm": 1.8642609119415283, "learning_rate": 1.070420379228428e-06, "loss": 0.7748, "step": 22484 }, { "epoch": 0.7942107563722741, "grad_norm": 2.196765661239624, "learning_rate": 1.0700667121465647e-06, "loss": 0.8018, "step": 22485 }, { "epoch": 0.794246078175982, "grad_norm": 1.6182961463928223, "learning_rate": 1.0697130964989282e-06, "loss": 0.7523, "step": 22486 }, { "epoch": 0.7942813999796899, "grad_norm": 1.8466252088546753, "learning_rate": 1.0693595322901484e-06, "loss": 0.8, "step": 22487 }, { "epoch": 0.7943167217833978, "grad_norm": 2.4484667778015137, "learning_rate": 1.0690060195248531e-06, "loss": 0.7642, "step": 22488 }, { "epoch": 0.7943520435871058, "grad_norm": 1.661708116531372, "learning_rate": 1.0686525582076663e-06, "loss": 0.7862, "step": 22489 }, { "epoch": 0.7943873653908137, "grad_norm": 1.9956332445144653, "learning_rate": 1.0682991483432164e-06, "loss": 0.7602, "step": 22490 }, { "epoch": 0.7944226871945216, "grad_norm": 1.8983659744262695, "learning_rate": 1.0679457899361295e-06, "loss": 0.7663, "step": 22491 }, { "epoch": 0.7944580089982295, "grad_norm": 1.7237805128097534, "learning_rate": 1.0675924829910273e-06, "loss": 0.7824, "step": 22492 }, { "epoch": 0.7944933308019374, "grad_norm": 1.6339291334152222, "learning_rate": 1.0672392275125354e-06, "loss": 0.7478, "step": 22493 }, { "epoch": 0.7945286526056453, "grad_norm": 1.9838685989379883, "learning_rate": 1.0668860235052792e-06, "loss": 0.7789, "step": 22494 }, { "epoch": 0.7945639744093532, "grad_norm": 1.7358496189117432, "learning_rate": 1.0665328709738775e-06, "loss": 0.7823, "step": 22495 }, { "epoch": 0.7945992962130611, "grad_norm": 1.8842413425445557, "learning_rate": 1.0661797699229542e-06, "loss": 0.7659, "step": 22496 }, { "epoch": 0.794634618016769, "grad_norm": 1.7295939922332764, "learning_rate": 1.0658267203571321e-06, "loss": 0.7781, "step": 22497 }, { "epoch": 0.794669939820477, "grad_norm": 0.9356258511543274, "learning_rate": 1.065473722281029e-06, "loss": 0.5607, "step": 22498 }, { "epoch": 0.7947052616241849, "grad_norm": 1.7260156869888306, "learning_rate": 1.065120775699266e-06, "loss": 0.7462, "step": 22499 }, { "epoch": 0.7947405834278928, "grad_norm": 2.0567867755889893, "learning_rate": 1.064767880616463e-06, "loss": 0.7615, "step": 22500 }, { "epoch": 0.7947759052316007, "grad_norm": 1.835986852645874, "learning_rate": 1.0644150370372391e-06, "loss": 0.7645, "step": 22501 }, { "epoch": 0.7948112270353086, "grad_norm": 1.520999550819397, "learning_rate": 1.0640622449662102e-06, "loss": 0.7291, "step": 22502 }, { "epoch": 0.7948465488390165, "grad_norm": 1.8143527507781982, "learning_rate": 1.0637095044079948e-06, "loss": 0.7561, "step": 22503 }, { "epoch": 0.7948818706427244, "grad_norm": 1.8293038606643677, "learning_rate": 1.0633568153672125e-06, "loss": 0.7698, "step": 22504 }, { "epoch": 0.7949171924464323, "grad_norm": 1.6764696836471558, "learning_rate": 1.063004177848473e-06, "loss": 0.7531, "step": 22505 }, { "epoch": 0.7949525142501401, "grad_norm": 1.9126182794570923, "learning_rate": 1.062651591856395e-06, "loss": 0.7664, "step": 22506 }, { "epoch": 0.794987836053848, "grad_norm": 1.62795090675354, "learning_rate": 1.0622990573955939e-06, "loss": 0.7631, "step": 22507 }, { "epoch": 0.795023157857556, "grad_norm": 1.7307960987091064, "learning_rate": 1.0619465744706813e-06, "loss": 0.7418, "step": 22508 }, { "epoch": 0.7950584796612639, "grad_norm": 1.6563314199447632, "learning_rate": 1.061594143086272e-06, "loss": 0.7875, "step": 22509 }, { "epoch": 0.7950938014649718, "grad_norm": 1.628409743309021, "learning_rate": 1.061241763246978e-06, "loss": 0.7637, "step": 22510 }, { "epoch": 0.7951291232686797, "grad_norm": 2.40836501121521, "learning_rate": 1.0608894349574129e-06, "loss": 0.7904, "step": 22511 }, { "epoch": 0.7951644450723876, "grad_norm": 1.7172839641571045, "learning_rate": 1.0605371582221845e-06, "loss": 0.7871, "step": 22512 }, { "epoch": 0.7951997668760955, "grad_norm": 1.8215246200561523, "learning_rate": 1.0601849330459057e-06, "loss": 0.81, "step": 22513 }, { "epoch": 0.7952350886798034, "grad_norm": 2.0444891452789307, "learning_rate": 1.059832759433187e-06, "loss": 0.7892, "step": 22514 }, { "epoch": 0.7952704104835113, "grad_norm": 1.4947680234909058, "learning_rate": 1.0594806373886352e-06, "loss": 0.7602, "step": 22515 }, { "epoch": 0.7953057322872192, "grad_norm": 1.7944813966751099, "learning_rate": 1.0591285669168605e-06, "loss": 0.779, "step": 22516 }, { "epoch": 0.7953410540909271, "grad_norm": 1.6329545974731445, "learning_rate": 1.058776548022471e-06, "loss": 0.7789, "step": 22517 }, { "epoch": 0.7953763758946351, "grad_norm": 1.6513475179672241, "learning_rate": 1.0584245807100723e-06, "loss": 0.7487, "step": 22518 }, { "epoch": 0.795411697698343, "grad_norm": 2.0659995079040527, "learning_rate": 1.0580726649842716e-06, "loss": 0.7826, "step": 22519 }, { "epoch": 0.7954470195020509, "grad_norm": 1.747198224067688, "learning_rate": 1.0577208008496764e-06, "loss": 0.7652, "step": 22520 }, { "epoch": 0.7954823413057588, "grad_norm": 0.950700044631958, "learning_rate": 1.0573689883108884e-06, "loss": 0.5473, "step": 22521 }, { "epoch": 0.7955176631094667, "grad_norm": 1.7164310216903687, "learning_rate": 1.057017227372516e-06, "loss": 0.7702, "step": 22522 }, { "epoch": 0.7955529849131746, "grad_norm": 1.6773152351379395, "learning_rate": 1.0566655180391588e-06, "loss": 0.7545, "step": 22523 }, { "epoch": 0.7955883067168825, "grad_norm": 2.129918098449707, "learning_rate": 1.0563138603154239e-06, "loss": 0.7435, "step": 22524 }, { "epoch": 0.7956236285205904, "grad_norm": 1.6088578701019287, "learning_rate": 1.0559622542059105e-06, "loss": 0.7661, "step": 22525 }, { "epoch": 0.7956589503242983, "grad_norm": 1.7142869234085083, "learning_rate": 1.0556106997152216e-06, "loss": 0.7713, "step": 22526 }, { "epoch": 0.7956942721280063, "grad_norm": 1.7325330972671509, "learning_rate": 1.0552591968479597e-06, "loss": 0.7858, "step": 22527 }, { "epoch": 0.7957295939317142, "grad_norm": 1.9575386047363281, "learning_rate": 1.0549077456087226e-06, "loss": 0.787, "step": 22528 }, { "epoch": 0.7957649157354221, "grad_norm": 1.7552069425582886, "learning_rate": 1.0545563460021113e-06, "loss": 0.7551, "step": 22529 }, { "epoch": 0.79580023753913, "grad_norm": 2.8226158618927, "learning_rate": 1.0542049980327263e-06, "loss": 0.8013, "step": 22530 }, { "epoch": 0.7958355593428379, "grad_norm": 1.6702345609664917, "learning_rate": 1.053853701705163e-06, "loss": 0.7462, "step": 22531 }, { "epoch": 0.7958708811465457, "grad_norm": 1.766957402229309, "learning_rate": 1.053502457024021e-06, "loss": 0.789, "step": 22532 }, { "epoch": 0.7959062029502536, "grad_norm": 2.390561819076538, "learning_rate": 1.0531512639938984e-06, "loss": 0.7324, "step": 22533 }, { "epoch": 0.7959415247539615, "grad_norm": 2.1164627075195312, "learning_rate": 1.0528001226193885e-06, "loss": 0.7829, "step": 22534 }, { "epoch": 0.7959768465576694, "grad_norm": 1.7777597904205322, "learning_rate": 1.0524490329050884e-06, "loss": 0.7725, "step": 22535 }, { "epoch": 0.7960121683613773, "grad_norm": 1.6831403970718384, "learning_rate": 1.0520979948555954e-06, "loss": 0.7792, "step": 22536 }, { "epoch": 0.7960474901650852, "grad_norm": 1.789521336555481, "learning_rate": 1.0517470084755e-06, "loss": 0.7663, "step": 22537 }, { "epoch": 0.7960828119687932, "grad_norm": 1.6493864059448242, "learning_rate": 1.0513960737693973e-06, "loss": 0.754, "step": 22538 }, { "epoch": 0.7961181337725011, "grad_norm": 1.772910237312317, "learning_rate": 1.0510451907418816e-06, "loss": 0.7434, "step": 22539 }, { "epoch": 0.796153455576209, "grad_norm": 1.8171348571777344, "learning_rate": 1.0506943593975449e-06, "loss": 0.764, "step": 22540 }, { "epoch": 0.7961887773799169, "grad_norm": 1.6314380168914795, "learning_rate": 1.050343579740976e-06, "loss": 0.7413, "step": 22541 }, { "epoch": 0.7962240991836248, "grad_norm": 1.7002798318862915, "learning_rate": 1.0499928517767683e-06, "loss": 0.7424, "step": 22542 }, { "epoch": 0.7962594209873327, "grad_norm": 1.9654606580734253, "learning_rate": 1.0496421755095133e-06, "loss": 0.7961, "step": 22543 }, { "epoch": 0.7962947427910406, "grad_norm": 1.7810847759246826, "learning_rate": 1.0492915509437968e-06, "loss": 0.7569, "step": 22544 }, { "epoch": 0.7963300645947485, "grad_norm": 0.8861542344093323, "learning_rate": 1.0489409780842103e-06, "loss": 0.582, "step": 22545 }, { "epoch": 0.7963653863984564, "grad_norm": 1.786565899848938, "learning_rate": 1.0485904569353428e-06, "loss": 0.7649, "step": 22546 }, { "epoch": 0.7964007082021644, "grad_norm": 2.075828790664673, "learning_rate": 1.0482399875017796e-06, "loss": 0.7411, "step": 22547 }, { "epoch": 0.7964360300058723, "grad_norm": 1.6446970701217651, "learning_rate": 1.0478895697881082e-06, "loss": 0.7457, "step": 22548 }, { "epoch": 0.7964713518095802, "grad_norm": 1.8285731077194214, "learning_rate": 1.0475392037989163e-06, "loss": 0.7647, "step": 22549 }, { "epoch": 0.7965066736132881, "grad_norm": 1.589301586151123, "learning_rate": 1.0471888895387872e-06, "loss": 0.7573, "step": 22550 }, { "epoch": 0.796541995416996, "grad_norm": 1.8092105388641357, "learning_rate": 1.046838627012307e-06, "loss": 0.7551, "step": 22551 }, { "epoch": 0.7965773172207039, "grad_norm": 1.688029170036316, "learning_rate": 1.0464884162240614e-06, "loss": 0.767, "step": 22552 }, { "epoch": 0.7966126390244118, "grad_norm": 1.6973956823349, "learning_rate": 1.0461382571786306e-06, "loss": 0.7756, "step": 22553 }, { "epoch": 0.7966479608281197, "grad_norm": 1.708099603652954, "learning_rate": 1.0457881498805989e-06, "loss": 0.7383, "step": 22554 }, { "epoch": 0.7966832826318276, "grad_norm": 1.5955253839492798, "learning_rate": 1.04543809433455e-06, "loss": 0.7158, "step": 22555 }, { "epoch": 0.7967186044355355, "grad_norm": 2.244851589202881, "learning_rate": 1.0450880905450623e-06, "loss": 0.7532, "step": 22556 }, { "epoch": 0.7967539262392435, "grad_norm": 1.715477705001831, "learning_rate": 1.0447381385167189e-06, "loss": 0.7669, "step": 22557 }, { "epoch": 0.7967892480429513, "grad_norm": 1.665916919708252, "learning_rate": 1.0443882382540987e-06, "loss": 0.76, "step": 22558 }, { "epoch": 0.7968245698466592, "grad_norm": 1.861617088317871, "learning_rate": 1.0440383897617845e-06, "loss": 0.766, "step": 22559 }, { "epoch": 0.7968598916503671, "grad_norm": 2.771240234375, "learning_rate": 1.043688593044349e-06, "loss": 0.7535, "step": 22560 }, { "epoch": 0.796895213454075, "grad_norm": 1.6390349864959717, "learning_rate": 1.0433388481063743e-06, "loss": 0.739, "step": 22561 }, { "epoch": 0.7969305352577829, "grad_norm": 1.626137137413025, "learning_rate": 1.042989154952438e-06, "loss": 0.7579, "step": 22562 }, { "epoch": 0.7969658570614908, "grad_norm": 2.043800115585327, "learning_rate": 1.0426395135871148e-06, "loss": 0.7752, "step": 22563 }, { "epoch": 0.7970011788651987, "grad_norm": 2.9558334350585938, "learning_rate": 1.0422899240149815e-06, "loss": 0.7679, "step": 22564 }, { "epoch": 0.7970365006689066, "grad_norm": 2.573289632797241, "learning_rate": 1.0419403862406146e-06, "loss": 0.7652, "step": 22565 }, { "epoch": 0.7970718224726145, "grad_norm": 1.6097601652145386, "learning_rate": 1.041590900268587e-06, "loss": 0.7674, "step": 22566 }, { "epoch": 0.7971071442763225, "grad_norm": 2.4789021015167236, "learning_rate": 1.0412414661034737e-06, "loss": 0.778, "step": 22567 }, { "epoch": 0.7971424660800304, "grad_norm": 1.8363900184631348, "learning_rate": 1.0408920837498476e-06, "loss": 0.7417, "step": 22568 }, { "epoch": 0.7971777878837383, "grad_norm": 1.7395256757736206, "learning_rate": 1.0405427532122836e-06, "loss": 0.7642, "step": 22569 }, { "epoch": 0.7972131096874462, "grad_norm": 1.8307619094848633, "learning_rate": 1.0401934744953496e-06, "loss": 0.7621, "step": 22570 }, { "epoch": 0.7972484314911541, "grad_norm": 1.8370355367660522, "learning_rate": 1.0398442476036197e-06, "loss": 0.7824, "step": 22571 }, { "epoch": 0.797283753294862, "grad_norm": 1.5785514116287231, "learning_rate": 1.0394950725416647e-06, "loss": 0.7412, "step": 22572 }, { "epoch": 0.7973190750985699, "grad_norm": 1.596245527267456, "learning_rate": 1.0391459493140526e-06, "loss": 0.7725, "step": 22573 }, { "epoch": 0.7973543969022778, "grad_norm": 1.5852837562561035, "learning_rate": 1.0387968779253538e-06, "loss": 0.7557, "step": 22574 }, { "epoch": 0.7973897187059857, "grad_norm": 1.7353688478469849, "learning_rate": 1.0384478583801383e-06, "loss": 0.7948, "step": 22575 }, { "epoch": 0.7974250405096937, "grad_norm": 2.085312843322754, "learning_rate": 1.0380988906829708e-06, "loss": 0.7605, "step": 22576 }, { "epoch": 0.7974603623134016, "grad_norm": 1.8571019172668457, "learning_rate": 1.0377499748384223e-06, "loss": 0.7691, "step": 22577 }, { "epoch": 0.7974956841171095, "grad_norm": 1.7466436624526978, "learning_rate": 1.037401110851055e-06, "loss": 0.7604, "step": 22578 }, { "epoch": 0.7975310059208174, "grad_norm": 1.9101368188858032, "learning_rate": 1.0370522987254394e-06, "loss": 0.7551, "step": 22579 }, { "epoch": 0.7975663277245253, "grad_norm": 1.8515838384628296, "learning_rate": 1.036703538466136e-06, "loss": 0.746, "step": 22580 }, { "epoch": 0.7976016495282332, "grad_norm": 1.6628776788711548, "learning_rate": 1.0363548300777127e-06, "loss": 0.7839, "step": 22581 }, { "epoch": 0.7976369713319411, "grad_norm": 1.7537199258804321, "learning_rate": 1.0360061735647331e-06, "loss": 0.7744, "step": 22582 }, { "epoch": 0.797672293135649, "grad_norm": 1.9475997686386108, "learning_rate": 1.0356575689317584e-06, "loss": 0.7575, "step": 22583 }, { "epoch": 0.7977076149393568, "grad_norm": 1.7626076936721802, "learning_rate": 1.0353090161833519e-06, "loss": 0.7598, "step": 22584 }, { "epoch": 0.7977429367430647, "grad_norm": 0.908186137676239, "learning_rate": 1.0349605153240772e-06, "loss": 0.5762, "step": 22585 }, { "epoch": 0.7977782585467726, "grad_norm": 1.7732397317886353, "learning_rate": 1.0346120663584925e-06, "loss": 0.7971, "step": 22586 }, { "epoch": 0.7978135803504806, "grad_norm": 1.9245481491088867, "learning_rate": 1.03426366929116e-06, "loss": 0.8125, "step": 22587 }, { "epoch": 0.7978489021541885, "grad_norm": 1.7309006452560425, "learning_rate": 1.0339153241266404e-06, "loss": 0.8154, "step": 22588 }, { "epoch": 0.7978842239578964, "grad_norm": 2.075061559677124, "learning_rate": 1.0335670308694895e-06, "loss": 0.7977, "step": 22589 }, { "epoch": 0.7979195457616043, "grad_norm": 1.7885241508483887, "learning_rate": 1.0332187895242685e-06, "loss": 0.7957, "step": 22590 }, { "epoch": 0.7979548675653122, "grad_norm": 1.7116613388061523, "learning_rate": 1.0328706000955351e-06, "loss": 0.7376, "step": 22591 }, { "epoch": 0.7979901893690201, "grad_norm": 1.6607561111450195, "learning_rate": 1.032522462587845e-06, "loss": 0.7509, "step": 22592 }, { "epoch": 0.798025511172728, "grad_norm": 1.0349974632263184, "learning_rate": 1.032174377005754e-06, "loss": 0.5886, "step": 22593 }, { "epoch": 0.7980608329764359, "grad_norm": 1.625937581062317, "learning_rate": 1.031826343353821e-06, "loss": 0.7661, "step": 22594 }, { "epoch": 0.7980961547801438, "grad_norm": 1.6026958227157593, "learning_rate": 1.0314783616365975e-06, "loss": 0.7466, "step": 22595 }, { "epoch": 0.7981314765838518, "grad_norm": 1.6471976041793823, "learning_rate": 1.0311304318586402e-06, "loss": 0.7874, "step": 22596 }, { "epoch": 0.7981667983875597, "grad_norm": 1.8750165700912476, "learning_rate": 1.0307825540245008e-06, "loss": 0.7268, "step": 22597 }, { "epoch": 0.7982021201912676, "grad_norm": 1.7339458465576172, "learning_rate": 1.030434728138734e-06, "loss": 0.7789, "step": 22598 }, { "epoch": 0.7982374419949755, "grad_norm": 1.6877946853637695, "learning_rate": 1.0300869542058906e-06, "loss": 0.7638, "step": 22599 }, { "epoch": 0.7982727637986834, "grad_norm": 1.6338921785354614, "learning_rate": 1.0297392322305222e-06, "loss": 0.7656, "step": 22600 }, { "epoch": 0.7983080856023913, "grad_norm": 1.6512970924377441, "learning_rate": 1.0293915622171824e-06, "loss": 0.7612, "step": 22601 }, { "epoch": 0.7983434074060992, "grad_norm": 1.840063214302063, "learning_rate": 1.0290439441704175e-06, "loss": 0.7629, "step": 22602 }, { "epoch": 0.7983787292098071, "grad_norm": 1.7015998363494873, "learning_rate": 1.0286963780947796e-06, "loss": 0.7693, "step": 22603 }, { "epoch": 0.798414051013515, "grad_norm": 1.6910163164138794, "learning_rate": 1.028348863994818e-06, "loss": 0.7544, "step": 22604 }, { "epoch": 0.798449372817223, "grad_norm": 1.8851321935653687, "learning_rate": 1.0280014018750783e-06, "loss": 0.7579, "step": 22605 }, { "epoch": 0.7984846946209309, "grad_norm": 11.39772891998291, "learning_rate": 1.02765399174011e-06, "loss": 0.7519, "step": 22606 }, { "epoch": 0.7985200164246388, "grad_norm": 1.6618306636810303, "learning_rate": 1.0273066335944603e-06, "loss": 0.7622, "step": 22607 }, { "epoch": 0.7985553382283467, "grad_norm": 2.0653276443481445, "learning_rate": 1.0269593274426743e-06, "loss": 0.7515, "step": 22608 }, { "epoch": 0.7985906600320546, "grad_norm": 1.7906008958816528, "learning_rate": 1.026612073289297e-06, "loss": 0.7526, "step": 22609 }, { "epoch": 0.7986259818357624, "grad_norm": 2.3525357246398926, "learning_rate": 1.026264871138875e-06, "loss": 0.7935, "step": 22610 }, { "epoch": 0.7986613036394703, "grad_norm": 1.9646841287612915, "learning_rate": 1.025917720995951e-06, "loss": 0.7715, "step": 22611 }, { "epoch": 0.7986966254431782, "grad_norm": 1.5827200412750244, "learning_rate": 1.025570622865068e-06, "loss": 0.7238, "step": 22612 }, { "epoch": 0.7987319472468861, "grad_norm": 1.5716661214828491, "learning_rate": 1.0252235767507712e-06, "loss": 0.757, "step": 22613 }, { "epoch": 0.798767269050594, "grad_norm": 1.6605421304702759, "learning_rate": 1.0248765826576013e-06, "loss": 0.7943, "step": 22614 }, { "epoch": 0.7988025908543019, "grad_norm": 1.7808701992034912, "learning_rate": 1.0245296405900978e-06, "loss": 0.8032, "step": 22615 }, { "epoch": 0.7988379126580099, "grad_norm": 1.6973044872283936, "learning_rate": 1.0241827505528035e-06, "loss": 0.7614, "step": 22616 }, { "epoch": 0.7988732344617178, "grad_norm": 1.6035981178283691, "learning_rate": 1.023835912550259e-06, "loss": 0.7334, "step": 22617 }, { "epoch": 0.7989085562654257, "grad_norm": 0.885615885257721, "learning_rate": 1.0234891265870012e-06, "loss": 0.5727, "step": 22618 }, { "epoch": 0.7989438780691336, "grad_norm": 2.170182704925537, "learning_rate": 1.0231423926675698e-06, "loss": 0.7353, "step": 22619 }, { "epoch": 0.7989791998728415, "grad_norm": 1.6979366540908813, "learning_rate": 1.0227957107965054e-06, "loss": 0.782, "step": 22620 }, { "epoch": 0.7990145216765494, "grad_norm": 1.6722753047943115, "learning_rate": 1.0224490809783412e-06, "loss": 0.7704, "step": 22621 }, { "epoch": 0.7990498434802573, "grad_norm": 1.7030669450759888, "learning_rate": 1.0221025032176163e-06, "loss": 0.7847, "step": 22622 }, { "epoch": 0.7990851652839652, "grad_norm": 1.7258294820785522, "learning_rate": 1.0217559775188667e-06, "loss": 0.7644, "step": 22623 }, { "epoch": 0.7991204870876731, "grad_norm": 1.66598379611969, "learning_rate": 1.0214095038866262e-06, "loss": 0.8222, "step": 22624 }, { "epoch": 0.799155808891381, "grad_norm": 1.6550681591033936, "learning_rate": 1.0210630823254298e-06, "loss": 0.7469, "step": 22625 }, { "epoch": 0.799191130695089, "grad_norm": 1.5755256414413452, "learning_rate": 1.020716712839812e-06, "loss": 0.7409, "step": 22626 }, { "epoch": 0.7992264524987969, "grad_norm": 1.5338820219039917, "learning_rate": 1.0203703954343075e-06, "loss": 0.7398, "step": 22627 }, { "epoch": 0.7992617743025048, "grad_norm": 1.6622697114944458, "learning_rate": 1.0200241301134455e-06, "loss": 0.7616, "step": 22628 }, { "epoch": 0.7992970961062127, "grad_norm": 1.8412028551101685, "learning_rate": 1.0196779168817595e-06, "loss": 0.7784, "step": 22629 }, { "epoch": 0.7993324179099206, "grad_norm": 1.7218064069747925, "learning_rate": 1.0193317557437826e-06, "loss": 0.7901, "step": 22630 }, { "epoch": 0.7993677397136285, "grad_norm": 1.691389799118042, "learning_rate": 1.0189856467040416e-06, "loss": 0.7593, "step": 22631 }, { "epoch": 0.7994030615173364, "grad_norm": 1.9453208446502686, "learning_rate": 1.0186395897670682e-06, "loss": 0.7585, "step": 22632 }, { "epoch": 0.7994383833210443, "grad_norm": 1.7156819105148315, "learning_rate": 1.0182935849373937e-06, "loss": 0.7523, "step": 22633 }, { "epoch": 0.7994737051247522, "grad_norm": 1.7550320625305176, "learning_rate": 1.0179476322195419e-06, "loss": 0.7528, "step": 22634 }, { "epoch": 0.7995090269284602, "grad_norm": 1.7335504293441772, "learning_rate": 1.0176017316180431e-06, "loss": 0.7701, "step": 22635 }, { "epoch": 0.799544348732168, "grad_norm": 1.876952052116394, "learning_rate": 1.017255883137424e-06, "loss": 0.7555, "step": 22636 }, { "epoch": 0.7995796705358759, "grad_norm": 1.5940172672271729, "learning_rate": 1.016910086782213e-06, "loss": 0.7175, "step": 22637 }, { "epoch": 0.7996149923395838, "grad_norm": 1.6876559257507324, "learning_rate": 1.0165643425569322e-06, "loss": 0.7354, "step": 22638 }, { "epoch": 0.7996503141432917, "grad_norm": 2.03368878364563, "learning_rate": 1.0162186504661087e-06, "loss": 0.7644, "step": 22639 }, { "epoch": 0.7996856359469996, "grad_norm": 2.3576672077178955, "learning_rate": 1.0158730105142678e-06, "loss": 0.7404, "step": 22640 }, { "epoch": 0.7997209577507075, "grad_norm": 1.7141528129577637, "learning_rate": 1.0155274227059308e-06, "loss": 0.7551, "step": 22641 }, { "epoch": 0.7997562795544154, "grad_norm": 1.7814865112304688, "learning_rate": 1.0151818870456221e-06, "loss": 0.7717, "step": 22642 }, { "epoch": 0.7997916013581233, "grad_norm": 1.7490630149841309, "learning_rate": 1.0148364035378649e-06, "loss": 0.7822, "step": 22643 }, { "epoch": 0.7998269231618312, "grad_norm": 1.8550091981887817, "learning_rate": 1.0144909721871782e-06, "loss": 0.7402, "step": 22644 }, { "epoch": 0.7998622449655391, "grad_norm": 1.7426954507827759, "learning_rate": 1.0141455929980847e-06, "loss": 0.7868, "step": 22645 }, { "epoch": 0.7998975667692471, "grad_norm": 1.8949291706085205, "learning_rate": 1.0138002659751062e-06, "loss": 0.7734, "step": 22646 }, { "epoch": 0.799932888572955, "grad_norm": 2.291072130203247, "learning_rate": 1.0134549911227586e-06, "loss": 0.7688, "step": 22647 }, { "epoch": 0.7999682103766629, "grad_norm": 1.6887964010238647, "learning_rate": 1.0131097684455626e-06, "loss": 0.7741, "step": 22648 }, { "epoch": 0.8000035321803708, "grad_norm": 1.6701745986938477, "learning_rate": 1.0127645979480383e-06, "loss": 0.7521, "step": 22649 }, { "epoch": 0.8000388539840787, "grad_norm": 2.007474660873413, "learning_rate": 1.0124194796346999e-06, "loss": 0.7952, "step": 22650 }, { "epoch": 0.8000741757877866, "grad_norm": 1.9049625396728516, "learning_rate": 1.0120744135100668e-06, "loss": 0.8126, "step": 22651 }, { "epoch": 0.8001094975914945, "grad_norm": 2.2313437461853027, "learning_rate": 1.0117293995786531e-06, "loss": 0.7771, "step": 22652 }, { "epoch": 0.8001448193952024, "grad_norm": 1.6675301790237427, "learning_rate": 1.0113844378449767e-06, "loss": 0.7832, "step": 22653 }, { "epoch": 0.8001801411989103, "grad_norm": 1.590943455696106, "learning_rate": 1.0110395283135493e-06, "loss": 0.7854, "step": 22654 }, { "epoch": 0.8002154630026183, "grad_norm": 1.9215760231018066, "learning_rate": 1.010694670988887e-06, "loss": 0.7727, "step": 22655 }, { "epoch": 0.8002507848063262, "grad_norm": 1.7202402353286743, "learning_rate": 1.010349865875504e-06, "loss": 0.7735, "step": 22656 }, { "epoch": 0.8002861066100341, "grad_norm": 1.7607717514038086, "learning_rate": 1.010005112977911e-06, "loss": 0.7668, "step": 22657 }, { "epoch": 0.800321428413742, "grad_norm": 1.6668376922607422, "learning_rate": 1.009660412300621e-06, "loss": 0.7678, "step": 22658 }, { "epoch": 0.8003567502174499, "grad_norm": 1.7781661748886108, "learning_rate": 1.0093157638481465e-06, "loss": 0.7404, "step": 22659 }, { "epoch": 0.8003920720211578, "grad_norm": 1.8043484687805176, "learning_rate": 1.0089711676249959e-06, "loss": 0.7758, "step": 22660 }, { "epoch": 0.8004273938248657, "grad_norm": 1.6896108388900757, "learning_rate": 1.0086266236356807e-06, "loss": 0.7865, "step": 22661 }, { "epoch": 0.8004627156285735, "grad_norm": 1.6933969259262085, "learning_rate": 1.008282131884711e-06, "loss": 0.7579, "step": 22662 }, { "epoch": 0.8004980374322814, "grad_norm": 1.698812484741211, "learning_rate": 1.0079376923765926e-06, "loss": 0.7731, "step": 22663 }, { "epoch": 0.8005333592359893, "grad_norm": 2.0422184467315674, "learning_rate": 1.007593305115836e-06, "loss": 0.763, "step": 22664 }, { "epoch": 0.8005686810396972, "grad_norm": 1.5481526851654053, "learning_rate": 1.0072489701069488e-06, "loss": 0.7397, "step": 22665 }, { "epoch": 0.8006040028434052, "grad_norm": 1.571273684501648, "learning_rate": 1.0069046873544352e-06, "loss": 0.7926, "step": 22666 }, { "epoch": 0.8006393246471131, "grad_norm": 1.6948450803756714, "learning_rate": 1.0065604568628023e-06, "loss": 0.7696, "step": 22667 }, { "epoch": 0.800674646450821, "grad_norm": 1.9778926372528076, "learning_rate": 1.006216278636557e-06, "loss": 0.7755, "step": 22668 }, { "epoch": 0.8007099682545289, "grad_norm": 1.7350420951843262, "learning_rate": 1.0058721526802013e-06, "loss": 0.7664, "step": 22669 }, { "epoch": 0.8007452900582368, "grad_norm": 1.9108827114105225, "learning_rate": 1.0055280789982408e-06, "loss": 0.7434, "step": 22670 }, { "epoch": 0.8007806118619447, "grad_norm": 1.6191316843032837, "learning_rate": 1.0051840575951766e-06, "loss": 0.7619, "step": 22671 }, { "epoch": 0.8008159336656526, "grad_norm": 1.8155819177627563, "learning_rate": 1.0048400884755144e-06, "loss": 0.7365, "step": 22672 }, { "epoch": 0.8008512554693605, "grad_norm": 5.597884178161621, "learning_rate": 1.0044961716437523e-06, "loss": 0.7758, "step": 22673 }, { "epoch": 0.8008865772730684, "grad_norm": 1.8558655977249146, "learning_rate": 1.004152307104393e-06, "loss": 0.7602, "step": 22674 }, { "epoch": 0.8009218990767764, "grad_norm": 1.6450657844543457, "learning_rate": 1.0038084948619392e-06, "loss": 0.7542, "step": 22675 }, { "epoch": 0.8009572208804843, "grad_norm": 1.9117333889007568, "learning_rate": 1.0034647349208866e-06, "loss": 0.7871, "step": 22676 }, { "epoch": 0.8009925426841922, "grad_norm": 1.644038200378418, "learning_rate": 1.0031210272857367e-06, "loss": 0.7839, "step": 22677 }, { "epoch": 0.8010278644879001, "grad_norm": 0.9870028495788574, "learning_rate": 1.002777371960989e-06, "loss": 0.6136, "step": 22678 }, { "epoch": 0.801063186291608, "grad_norm": 1.7652292251586914, "learning_rate": 1.0024337689511376e-06, "loss": 0.7807, "step": 22679 }, { "epoch": 0.8010985080953159, "grad_norm": 1.827106237411499, "learning_rate": 1.0020902182606822e-06, "loss": 0.7636, "step": 22680 }, { "epoch": 0.8011338298990238, "grad_norm": 1.598618745803833, "learning_rate": 1.0017467198941193e-06, "loss": 0.7554, "step": 22681 }, { "epoch": 0.8011691517027317, "grad_norm": 1.700270175933838, "learning_rate": 1.001403273855943e-06, "loss": 0.7621, "step": 22682 }, { "epoch": 0.8012044735064396, "grad_norm": 1.9576117992401123, "learning_rate": 1.0010598801506488e-06, "loss": 0.7518, "step": 22683 }, { "epoch": 0.8012397953101476, "grad_norm": 2.0753538608551025, "learning_rate": 1.0007165387827311e-06, "loss": 0.7712, "step": 22684 }, { "epoch": 0.8012751171138555, "grad_norm": 1.6245802640914917, "learning_rate": 1.0003732497566849e-06, "loss": 0.7386, "step": 22685 }, { "epoch": 0.8013104389175634, "grad_norm": 1.9137917757034302, "learning_rate": 1.0000300130770003e-06, "loss": 0.7695, "step": 22686 }, { "epoch": 0.8013457607212713, "grad_norm": 1.634851336479187, "learning_rate": 9.996868287481715e-07, "loss": 0.7774, "step": 22687 }, { "epoch": 0.8013810825249791, "grad_norm": 1.7591829299926758, "learning_rate": 9.993436967746917e-07, "loss": 0.7567, "step": 22688 }, { "epoch": 0.801416404328687, "grad_norm": 1.611297607421875, "learning_rate": 9.990006171610467e-07, "loss": 0.7583, "step": 22689 }, { "epoch": 0.8014517261323949, "grad_norm": 1.8927843570709229, "learning_rate": 9.9865758991173e-07, "loss": 0.7541, "step": 22690 }, { "epoch": 0.8014870479361028, "grad_norm": 2.057603597640991, "learning_rate": 9.983146150312318e-07, "loss": 0.7757, "step": 22691 }, { "epoch": 0.8015223697398107, "grad_norm": 2.9445314407348633, "learning_rate": 9.97971692524038e-07, "loss": 0.7601, "step": 22692 }, { "epoch": 0.8015576915435186, "grad_norm": 1.8001784086227417, "learning_rate": 9.976288223946385e-07, "loss": 0.7598, "step": 22693 }, { "epoch": 0.8015930133472265, "grad_norm": 1.818588376045227, "learning_rate": 9.972860046475207e-07, "loss": 0.7584, "step": 22694 }, { "epoch": 0.8016283351509345, "grad_norm": 1.8641269207000732, "learning_rate": 9.969432392871732e-07, "loss": 0.7898, "step": 22695 }, { "epoch": 0.8016636569546424, "grad_norm": 1.585821509361267, "learning_rate": 9.966005263180777e-07, "loss": 0.7579, "step": 22696 }, { "epoch": 0.8016989787583503, "grad_norm": 1.7935439348220825, "learning_rate": 9.962578657447224e-07, "loss": 0.7772, "step": 22697 }, { "epoch": 0.8017343005620582, "grad_norm": 1.6708139181137085, "learning_rate": 9.959152575715931e-07, "loss": 0.7777, "step": 22698 }, { "epoch": 0.8017696223657661, "grad_norm": 1.6360046863555908, "learning_rate": 9.955727018031708e-07, "loss": 0.7483, "step": 22699 }, { "epoch": 0.801804944169474, "grad_norm": 1.6218656301498413, "learning_rate": 9.9523019844394e-07, "loss": 0.78, "step": 22700 }, { "epoch": 0.8018402659731819, "grad_norm": 1.6753848791122437, "learning_rate": 9.948877474983854e-07, "loss": 0.7868, "step": 22701 }, { "epoch": 0.8018755877768898, "grad_norm": 1.7741285562515259, "learning_rate": 9.94545348970985e-07, "loss": 0.7762, "step": 22702 }, { "epoch": 0.8019109095805977, "grad_norm": 1.7464772462844849, "learning_rate": 9.942030028662225e-07, "loss": 0.79, "step": 22703 }, { "epoch": 0.8019462313843057, "grad_norm": 1.8559235334396362, "learning_rate": 9.938607091885794e-07, "loss": 0.7884, "step": 22704 }, { "epoch": 0.8019815531880136, "grad_norm": 1.0014278888702393, "learning_rate": 9.93518467942533e-07, "loss": 0.5859, "step": 22705 }, { "epoch": 0.8020168749917215, "grad_norm": 1.5989631414413452, "learning_rate": 9.93176279132564e-07, "loss": 0.7393, "step": 22706 }, { "epoch": 0.8020521967954294, "grad_norm": 1.781533122062683, "learning_rate": 9.928341427631515e-07, "loss": 0.7891, "step": 22707 }, { "epoch": 0.8020875185991373, "grad_norm": 1.7222113609313965, "learning_rate": 9.924920588387727e-07, "loss": 0.7527, "step": 22708 }, { "epoch": 0.8021228404028452, "grad_norm": 1.6899349689483643, "learning_rate": 9.921500273639028e-07, "loss": 0.7474, "step": 22709 }, { "epoch": 0.8021581622065531, "grad_norm": 1.6920875310897827, "learning_rate": 9.918080483430203e-07, "loss": 0.7527, "step": 22710 }, { "epoch": 0.802193484010261, "grad_norm": 1.9261577129364014, "learning_rate": 9.91466121780602e-07, "loss": 0.7818, "step": 22711 }, { "epoch": 0.8022288058139689, "grad_norm": 1.69375479221344, "learning_rate": 9.9112424768112e-07, "loss": 0.7704, "step": 22712 }, { "epoch": 0.8022641276176768, "grad_norm": 3.211015224456787, "learning_rate": 9.907824260490507e-07, "loss": 0.7939, "step": 22713 }, { "epoch": 0.8022994494213848, "grad_norm": 1.6607334613800049, "learning_rate": 9.904406568888685e-07, "loss": 0.7383, "step": 22714 }, { "epoch": 0.8023347712250926, "grad_norm": 4.0416717529296875, "learning_rate": 9.900989402050436e-07, "loss": 0.7574, "step": 22715 }, { "epoch": 0.8023700930288005, "grad_norm": 1.998307704925537, "learning_rate": 9.897572760020507e-07, "loss": 0.7699, "step": 22716 }, { "epoch": 0.8024054148325084, "grad_norm": 2.136451482772827, "learning_rate": 9.894156642843617e-07, "loss": 0.758, "step": 22717 }, { "epoch": 0.8024407366362163, "grad_norm": 1.647055983543396, "learning_rate": 9.890741050564452e-07, "loss": 0.7672, "step": 22718 }, { "epoch": 0.8024760584399242, "grad_norm": 1.6015374660491943, "learning_rate": 9.887325983227725e-07, "loss": 0.7845, "step": 22719 }, { "epoch": 0.8025113802436321, "grad_norm": 1.7826462984085083, "learning_rate": 9.883911440878153e-07, "loss": 0.7409, "step": 22720 }, { "epoch": 0.80254670204734, "grad_norm": 1.8296012878417969, "learning_rate": 9.880497423560392e-07, "loss": 0.7879, "step": 22721 }, { "epoch": 0.8025820238510479, "grad_norm": 1.7127938270568848, "learning_rate": 9.877083931319137e-07, "loss": 0.7499, "step": 22722 }, { "epoch": 0.8026173456547558, "grad_norm": 1.6222617626190186, "learning_rate": 9.873670964199083e-07, "loss": 0.8043, "step": 22723 }, { "epoch": 0.8026526674584638, "grad_norm": 1.8567297458648682, "learning_rate": 9.870258522244864e-07, "loss": 0.7983, "step": 22724 }, { "epoch": 0.8026879892621717, "grad_norm": 1.602769374847412, "learning_rate": 9.866846605501173e-07, "loss": 0.7385, "step": 22725 }, { "epoch": 0.8027233110658796, "grad_norm": 1.6743453741073608, "learning_rate": 9.863435214012634e-07, "loss": 0.7754, "step": 22726 }, { "epoch": 0.8027586328695875, "grad_norm": 1.576730489730835, "learning_rate": 9.860024347823926e-07, "loss": 0.744, "step": 22727 }, { "epoch": 0.8027939546732954, "grad_norm": 1.8588101863861084, "learning_rate": 9.856614006979658e-07, "loss": 0.7698, "step": 22728 }, { "epoch": 0.8028292764770033, "grad_norm": 2.098672389984131, "learning_rate": 9.85320419152448e-07, "loss": 0.8218, "step": 22729 }, { "epoch": 0.8028645982807112, "grad_norm": 1.6166417598724365, "learning_rate": 9.849794901503035e-07, "loss": 0.7277, "step": 22730 }, { "epoch": 0.8028999200844191, "grad_norm": 1.797186017036438, "learning_rate": 9.846386136959911e-07, "loss": 0.7585, "step": 22731 }, { "epoch": 0.802935241888127, "grad_norm": 1.6581635475158691, "learning_rate": 9.842977897939737e-07, "loss": 0.7746, "step": 22732 }, { "epoch": 0.802970563691835, "grad_norm": 1.727129340171814, "learning_rate": 9.839570184487135e-07, "loss": 0.7773, "step": 22733 }, { "epoch": 0.8030058854955429, "grad_norm": 1.6932603120803833, "learning_rate": 9.836162996646675e-07, "loss": 0.767, "step": 22734 }, { "epoch": 0.8030412072992508, "grad_norm": 1.7857015132904053, "learning_rate": 9.832756334462961e-07, "loss": 0.7315, "step": 22735 }, { "epoch": 0.8030765291029587, "grad_norm": 1.8186181783676147, "learning_rate": 9.829350197980596e-07, "loss": 0.7618, "step": 22736 }, { "epoch": 0.8031118509066666, "grad_norm": 1.685050368309021, "learning_rate": 9.825944587244134e-07, "loss": 0.7842, "step": 22737 }, { "epoch": 0.8031471727103745, "grad_norm": 1.7603577375411987, "learning_rate": 9.822539502298156e-07, "loss": 0.7385, "step": 22738 }, { "epoch": 0.8031824945140824, "grad_norm": 1.5733190774917603, "learning_rate": 9.819134943187242e-07, "loss": 0.7545, "step": 22739 }, { "epoch": 0.8032178163177903, "grad_norm": 1.5416837930679321, "learning_rate": 9.815730909955922e-07, "loss": 0.7634, "step": 22740 }, { "epoch": 0.8032531381214981, "grad_norm": 1.7699058055877686, "learning_rate": 9.812327402648763e-07, "loss": 0.756, "step": 22741 }, { "epoch": 0.803288459925206, "grad_norm": 1.8143678903579712, "learning_rate": 9.808924421310306e-07, "loss": 0.7866, "step": 22742 }, { "epoch": 0.8033237817289139, "grad_norm": 1.7029337882995605, "learning_rate": 9.80552196598511e-07, "loss": 0.8048, "step": 22743 }, { "epoch": 0.8033591035326219, "grad_norm": 1.6630762815475464, "learning_rate": 9.802120036717683e-07, "loss": 0.7392, "step": 22744 }, { "epoch": 0.8033944253363298, "grad_norm": 1.978489875793457, "learning_rate": 9.79871863355254e-07, "loss": 0.7498, "step": 22745 }, { "epoch": 0.8034297471400377, "grad_norm": 0.8659074306488037, "learning_rate": 9.795317756534222e-07, "loss": 0.5771, "step": 22746 }, { "epoch": 0.8034650689437456, "grad_norm": 1.967174768447876, "learning_rate": 9.791917405707213e-07, "loss": 0.7769, "step": 22747 }, { "epoch": 0.8035003907474535, "grad_norm": 1.5265148878097534, "learning_rate": 9.788517581116036e-07, "loss": 0.7432, "step": 22748 }, { "epoch": 0.8035357125511614, "grad_norm": 1.8877531290054321, "learning_rate": 9.785118282805188e-07, "loss": 0.7508, "step": 22749 }, { "epoch": 0.8035710343548693, "grad_norm": 1.5871292352676392, "learning_rate": 9.781719510819149e-07, "loss": 0.7745, "step": 22750 }, { "epoch": 0.8036063561585772, "grad_norm": 2.2740588188171387, "learning_rate": 9.778321265202394e-07, "loss": 0.7697, "step": 22751 }, { "epoch": 0.8036416779622851, "grad_norm": 1.6619770526885986, "learning_rate": 9.774923545999422e-07, "loss": 0.7298, "step": 22752 }, { "epoch": 0.803676999765993, "grad_norm": 1.8236788511276245, "learning_rate": 9.77152635325469e-07, "loss": 0.7823, "step": 22753 }, { "epoch": 0.803712321569701, "grad_norm": 1.7407917976379395, "learning_rate": 9.76812968701265e-07, "loss": 0.783, "step": 22754 }, { "epoch": 0.8037476433734089, "grad_norm": 1.8010801076889038, "learning_rate": 9.764733547317767e-07, "loss": 0.7856, "step": 22755 }, { "epoch": 0.8037829651771168, "grad_norm": 1.5666799545288086, "learning_rate": 9.761337934214505e-07, "loss": 0.7428, "step": 22756 }, { "epoch": 0.8038182869808247, "grad_norm": 1.7033233642578125, "learning_rate": 9.75794284774727e-07, "loss": 0.7627, "step": 22757 }, { "epoch": 0.8038536087845326, "grad_norm": 0.9251564741134644, "learning_rate": 9.754548287960514e-07, "loss": 0.5764, "step": 22758 }, { "epoch": 0.8038889305882405, "grad_norm": 1.7203510999679565, "learning_rate": 9.75115425489868e-07, "loss": 0.7983, "step": 22759 }, { "epoch": 0.8039242523919484, "grad_norm": 1.7570946216583252, "learning_rate": 9.747760748606162e-07, "loss": 0.7611, "step": 22760 }, { "epoch": 0.8039595741956563, "grad_norm": 1.6936601400375366, "learning_rate": 9.744367769127377e-07, "loss": 0.7689, "step": 22761 }, { "epoch": 0.8039948959993642, "grad_norm": 1.8710888624191284, "learning_rate": 9.740975316506756e-07, "loss": 0.7564, "step": 22762 }, { "epoch": 0.8040302178030722, "grad_norm": 1.6877126693725586, "learning_rate": 9.737583390788686e-07, "loss": 0.7941, "step": 22763 }, { "epoch": 0.8040655396067801, "grad_norm": 1.6106128692626953, "learning_rate": 9.734191992017539e-07, "loss": 0.74, "step": 22764 }, { "epoch": 0.804100861410488, "grad_norm": 1.6506783962249756, "learning_rate": 9.73080112023772e-07, "loss": 0.7489, "step": 22765 }, { "epoch": 0.8041361832141959, "grad_norm": 2.221332311630249, "learning_rate": 9.727410775493623e-07, "loss": 0.7485, "step": 22766 }, { "epoch": 0.8041715050179037, "grad_norm": 1.7475671768188477, "learning_rate": 9.724020957829584e-07, "loss": 0.7491, "step": 22767 }, { "epoch": 0.8042068268216116, "grad_norm": 1.7409708499908447, "learning_rate": 9.720631667289987e-07, "loss": 0.7786, "step": 22768 }, { "epoch": 0.8042421486253195, "grad_norm": 1.643418550491333, "learning_rate": 9.71724290391921e-07, "loss": 0.7356, "step": 22769 }, { "epoch": 0.8042774704290274, "grad_norm": 2.2162365913391113, "learning_rate": 9.713854667761568e-07, "loss": 0.7694, "step": 22770 }, { "epoch": 0.8043127922327353, "grad_norm": 0.9631298780441284, "learning_rate": 9.710466958861426e-07, "loss": 0.5949, "step": 22771 }, { "epoch": 0.8043481140364432, "grad_norm": 2.458059072494507, "learning_rate": 9.707079777263129e-07, "loss": 0.8082, "step": 22772 }, { "epoch": 0.8043834358401512, "grad_norm": 1.5622386932373047, "learning_rate": 9.703693123010987e-07, "loss": 0.715, "step": 22773 }, { "epoch": 0.8044187576438591, "grad_norm": 1.9773789644241333, "learning_rate": 9.700306996149333e-07, "loss": 0.7492, "step": 22774 }, { "epoch": 0.804454079447567, "grad_norm": 1.8343950510025024, "learning_rate": 9.696921396722498e-07, "loss": 0.7636, "step": 22775 }, { "epoch": 0.8044894012512749, "grad_norm": 1.629019856452942, "learning_rate": 9.69353632477477e-07, "loss": 0.7395, "step": 22776 }, { "epoch": 0.8045247230549828, "grad_norm": 1.5951730012893677, "learning_rate": 9.69015178035046e-07, "loss": 0.7761, "step": 22777 }, { "epoch": 0.8045600448586907, "grad_norm": 1.8927699327468872, "learning_rate": 9.686767763493877e-07, "loss": 0.7906, "step": 22778 }, { "epoch": 0.8045953666623986, "grad_norm": 1.6212184429168701, "learning_rate": 9.683384274249285e-07, "loss": 0.7423, "step": 22779 }, { "epoch": 0.8046306884661065, "grad_norm": 1.6003317832946777, "learning_rate": 9.680001312660986e-07, "loss": 0.7794, "step": 22780 }, { "epoch": 0.8046660102698144, "grad_norm": 1.8046907186508179, "learning_rate": 9.676618878773264e-07, "loss": 0.7488, "step": 22781 }, { "epoch": 0.8047013320735223, "grad_norm": 1.7295209169387817, "learning_rate": 9.673236972630367e-07, "loss": 0.7652, "step": 22782 }, { "epoch": 0.8047366538772303, "grad_norm": 2.1736905574798584, "learning_rate": 9.669855594276555e-07, "loss": 0.7644, "step": 22783 }, { "epoch": 0.8047719756809382, "grad_norm": 1.7786715030670166, "learning_rate": 9.666474743756094e-07, "loss": 0.7891, "step": 22784 }, { "epoch": 0.8048072974846461, "grad_norm": 1.901695728302002, "learning_rate": 9.663094421113244e-07, "loss": 0.7654, "step": 22785 }, { "epoch": 0.804842619288354, "grad_norm": 1.7009004354476929, "learning_rate": 9.659714626392214e-07, "loss": 0.7795, "step": 22786 }, { "epoch": 0.8048779410920619, "grad_norm": 1.889420986175537, "learning_rate": 9.656335359637258e-07, "loss": 0.7942, "step": 22787 }, { "epoch": 0.8049132628957698, "grad_norm": 1.6591942310333252, "learning_rate": 9.652956620892612e-07, "loss": 0.7436, "step": 22788 }, { "epoch": 0.8049485846994777, "grad_norm": 1.6252001523971558, "learning_rate": 9.649578410202476e-07, "loss": 0.7701, "step": 22789 }, { "epoch": 0.8049839065031856, "grad_norm": 1.558343768119812, "learning_rate": 9.646200727611072e-07, "loss": 0.7268, "step": 22790 }, { "epoch": 0.8050192283068935, "grad_norm": 1.5956143140792847, "learning_rate": 9.642823573162612e-07, "loss": 0.7449, "step": 22791 }, { "epoch": 0.8050545501106015, "grad_norm": 1.8450255393981934, "learning_rate": 9.639446946901287e-07, "loss": 0.7899, "step": 22792 }, { "epoch": 0.8050898719143093, "grad_norm": 1.8193252086639404, "learning_rate": 9.636070848871292e-07, "loss": 0.7622, "step": 22793 }, { "epoch": 0.8051251937180172, "grad_norm": 1.6758654117584229, "learning_rate": 9.632695279116828e-07, "loss": 0.7853, "step": 22794 }, { "epoch": 0.8051605155217251, "grad_norm": 1.6055290699005127, "learning_rate": 9.629320237682044e-07, "loss": 0.7572, "step": 22795 }, { "epoch": 0.805195837325433, "grad_norm": 1.815934181213379, "learning_rate": 9.62594572461113e-07, "loss": 0.7601, "step": 22796 }, { "epoch": 0.8052311591291409, "grad_norm": 1.5845800638198853, "learning_rate": 9.622571739948256e-07, "loss": 0.7509, "step": 22797 }, { "epoch": 0.8052664809328488, "grad_norm": 1.5807172060012817, "learning_rate": 9.619198283737569e-07, "loss": 0.7815, "step": 22798 }, { "epoch": 0.8053018027365567, "grad_norm": 1.7924336194992065, "learning_rate": 9.61582535602323e-07, "loss": 0.7635, "step": 22799 }, { "epoch": 0.8053371245402646, "grad_norm": 1.7237067222595215, "learning_rate": 9.612452956849366e-07, "loss": 0.778, "step": 22800 }, { "epoch": 0.8053724463439725, "grad_norm": 1.6736016273498535, "learning_rate": 9.60908108626014e-07, "loss": 0.7702, "step": 22801 }, { "epoch": 0.8054077681476804, "grad_norm": 1.731993556022644, "learning_rate": 9.605709744299657e-07, "loss": 0.7433, "step": 22802 }, { "epoch": 0.8054430899513884, "grad_norm": 1.7020922899246216, "learning_rate": 9.602338931012051e-07, "loss": 0.7222, "step": 22803 }, { "epoch": 0.8054784117550963, "grad_norm": 0.933476984500885, "learning_rate": 9.598968646441448e-07, "loss": 0.5903, "step": 22804 }, { "epoch": 0.8055137335588042, "grad_norm": 1.6229133605957031, "learning_rate": 9.595598890631941e-07, "loss": 0.7514, "step": 22805 }, { "epoch": 0.8055490553625121, "grad_norm": 1.8225078582763672, "learning_rate": 9.592229663627634e-07, "loss": 0.7617, "step": 22806 }, { "epoch": 0.80558437716622, "grad_norm": 1.7674980163574219, "learning_rate": 9.588860965472646e-07, "loss": 0.7513, "step": 22807 }, { "epoch": 0.8056196989699279, "grad_norm": 1.73757004737854, "learning_rate": 9.585492796211038e-07, "loss": 0.7824, "step": 22808 }, { "epoch": 0.8056550207736358, "grad_norm": 1.8533095121383667, "learning_rate": 9.582125155886902e-07, "loss": 0.752, "step": 22809 }, { "epoch": 0.8056903425773437, "grad_norm": 1.587506651878357, "learning_rate": 9.578758044544311e-07, "loss": 0.7566, "step": 22810 }, { "epoch": 0.8057256643810516, "grad_norm": 1.6814444065093994, "learning_rate": 9.575391462227356e-07, "loss": 0.7496, "step": 22811 }, { "epoch": 0.8057609861847596, "grad_norm": 1.7009081840515137, "learning_rate": 9.572025408980061e-07, "loss": 0.784, "step": 22812 }, { "epoch": 0.8057963079884675, "grad_norm": 1.7849854230880737, "learning_rate": 9.568659884846498e-07, "loss": 0.7419, "step": 22813 }, { "epoch": 0.8058316297921754, "grad_norm": 1.6972099542617798, "learning_rate": 9.56529488987073e-07, "loss": 0.7625, "step": 22814 }, { "epoch": 0.8058669515958833, "grad_norm": 1.57504403591156, "learning_rate": 9.561930424096766e-07, "loss": 0.742, "step": 22815 }, { "epoch": 0.8059022733995912, "grad_norm": 1.7190052270889282, "learning_rate": 9.558566487568655e-07, "loss": 0.7533, "step": 22816 }, { "epoch": 0.8059375952032991, "grad_norm": 1.494284749031067, "learning_rate": 9.55520308033045e-07, "loss": 0.7648, "step": 22817 }, { "epoch": 0.805972917007007, "grad_norm": 1.8125907182693481, "learning_rate": 9.551840202426117e-07, "loss": 0.7645, "step": 22818 }, { "epoch": 0.8060082388107148, "grad_norm": 1.9318815469741821, "learning_rate": 9.548477853899695e-07, "loss": 0.7433, "step": 22819 }, { "epoch": 0.8060435606144227, "grad_norm": 1.7787351608276367, "learning_rate": 9.545116034795193e-07, "loss": 0.7869, "step": 22820 }, { "epoch": 0.8060788824181306, "grad_norm": 1.571543574333191, "learning_rate": 9.541754745156623e-07, "loss": 0.7596, "step": 22821 }, { "epoch": 0.8061142042218385, "grad_norm": 1.6168930530548096, "learning_rate": 9.538393985027944e-07, "loss": 0.7707, "step": 22822 }, { "epoch": 0.8061495260255465, "grad_norm": 1.9600645303726196, "learning_rate": 9.535033754453161e-07, "loss": 0.7442, "step": 22823 }, { "epoch": 0.8061848478292544, "grad_norm": 1.7035386562347412, "learning_rate": 9.531674053476264e-07, "loss": 0.7469, "step": 22824 }, { "epoch": 0.8062201696329623, "grad_norm": 1.7696588039398193, "learning_rate": 9.528314882141193e-07, "loss": 0.7577, "step": 22825 }, { "epoch": 0.8062554914366702, "grad_norm": 1.820400357246399, "learning_rate": 9.524956240491933e-07, "loss": 0.7895, "step": 22826 }, { "epoch": 0.8062908132403781, "grad_norm": 2.2203333377838135, "learning_rate": 9.521598128572452e-07, "loss": 0.7626, "step": 22827 }, { "epoch": 0.806326135044086, "grad_norm": 1.8584167957305908, "learning_rate": 9.518240546426666e-07, "loss": 0.7557, "step": 22828 }, { "epoch": 0.8063614568477939, "grad_norm": 1.6426825523376465, "learning_rate": 9.514883494098543e-07, "loss": 0.8011, "step": 22829 }, { "epoch": 0.8063967786515018, "grad_norm": 1.849257469177246, "learning_rate": 9.511526971632023e-07, "loss": 0.7519, "step": 22830 }, { "epoch": 0.8064321004552097, "grad_norm": 1.6988548040390015, "learning_rate": 9.508170979071018e-07, "loss": 0.7527, "step": 22831 }, { "epoch": 0.8064674222589177, "grad_norm": 1.7218053340911865, "learning_rate": 9.504815516459459e-07, "loss": 0.7849, "step": 22832 }, { "epoch": 0.8065027440626256, "grad_norm": 1.8640882968902588, "learning_rate": 9.501460583841271e-07, "loss": 0.7784, "step": 22833 }, { "epoch": 0.8065380658663335, "grad_norm": 1.7775681018829346, "learning_rate": 9.498106181260347e-07, "loss": 0.75, "step": 22834 }, { "epoch": 0.8065733876700414, "grad_norm": 1.5873531103134155, "learning_rate": 9.494752308760591e-07, "loss": 0.7891, "step": 22835 }, { "epoch": 0.8066087094737493, "grad_norm": 1.741097092628479, "learning_rate": 9.491398966385917e-07, "loss": 0.7468, "step": 22836 }, { "epoch": 0.8066440312774572, "grad_norm": 2.0807688236236572, "learning_rate": 9.488046154180202e-07, "loss": 0.802, "step": 22837 }, { "epoch": 0.8066793530811651, "grad_norm": 1.6664835214614868, "learning_rate": 9.484693872187306e-07, "loss": 0.7304, "step": 22838 }, { "epoch": 0.806714674884873, "grad_norm": 1.5526292324066162, "learning_rate": 9.481342120451125e-07, "loss": 0.7508, "step": 22839 }, { "epoch": 0.8067499966885809, "grad_norm": 1.6607855558395386, "learning_rate": 9.477990899015532e-07, "loss": 0.7702, "step": 22840 }, { "epoch": 0.8067853184922888, "grad_norm": 1.7910690307617188, "learning_rate": 9.474640207924362e-07, "loss": 0.7563, "step": 22841 }, { "epoch": 0.8068206402959968, "grad_norm": 1.740986704826355, "learning_rate": 9.471290047221487e-07, "loss": 0.7967, "step": 22842 }, { "epoch": 0.8068559620997047, "grad_norm": 1.8982110023498535, "learning_rate": 9.46794041695076e-07, "loss": 0.7857, "step": 22843 }, { "epoch": 0.8068912839034126, "grad_norm": 1.8822810649871826, "learning_rate": 9.464591317155996e-07, "loss": 0.816, "step": 22844 }, { "epoch": 0.8069266057071204, "grad_norm": 1.8207930326461792, "learning_rate": 9.461242747881044e-07, "loss": 0.7909, "step": 22845 }, { "epoch": 0.8069619275108283, "grad_norm": 1.7275822162628174, "learning_rate": 9.457894709169734e-07, "loss": 0.7751, "step": 22846 }, { "epoch": 0.8069972493145362, "grad_norm": 2.11487078666687, "learning_rate": 9.454547201065867e-07, "loss": 0.7625, "step": 22847 }, { "epoch": 0.8070325711182441, "grad_norm": 1.7586956024169922, "learning_rate": 9.451200223613266e-07, "loss": 0.765, "step": 22848 }, { "epoch": 0.807067892921952, "grad_norm": 1.6649163961410522, "learning_rate": 9.447853776855747e-07, "loss": 0.7432, "step": 22849 }, { "epoch": 0.8071032147256599, "grad_norm": 1.5807673931121826, "learning_rate": 9.444507860837083e-07, "loss": 0.7441, "step": 22850 }, { "epoch": 0.8071385365293678, "grad_norm": 2.791095495223999, "learning_rate": 9.441162475601074e-07, "loss": 0.7859, "step": 22851 }, { "epoch": 0.8071738583330758, "grad_norm": 1.8368648290634155, "learning_rate": 9.43781762119152e-07, "loss": 0.7719, "step": 22852 }, { "epoch": 0.8072091801367837, "grad_norm": 1.8424407243728638, "learning_rate": 9.434473297652169e-07, "loss": 0.7492, "step": 22853 }, { "epoch": 0.8072445019404916, "grad_norm": 1.7030282020568848, "learning_rate": 9.431129505026821e-07, "loss": 0.7735, "step": 22854 }, { "epoch": 0.8072798237441995, "grad_norm": 1.5758850574493408, "learning_rate": 9.42778624335921e-07, "loss": 0.7476, "step": 22855 }, { "epoch": 0.8073151455479074, "grad_norm": 1.9852702617645264, "learning_rate": 9.424443512693121e-07, "loss": 0.7732, "step": 22856 }, { "epoch": 0.8073504673516153, "grad_norm": 1.6760790348052979, "learning_rate": 9.421101313072273e-07, "loss": 0.751, "step": 22857 }, { "epoch": 0.8073857891553232, "grad_norm": 1.830816388130188, "learning_rate": 9.417759644540419e-07, "loss": 0.7548, "step": 22858 }, { "epoch": 0.8074211109590311, "grad_norm": 1.7650007009506226, "learning_rate": 9.414418507141321e-07, "loss": 0.7863, "step": 22859 }, { "epoch": 0.807456432762739, "grad_norm": 1.7897409200668335, "learning_rate": 9.411077900918664e-07, "loss": 0.7763, "step": 22860 }, { "epoch": 0.807491754566447, "grad_norm": 1.7561101913452148, "learning_rate": 9.407737825916191e-07, "loss": 0.7905, "step": 22861 }, { "epoch": 0.8075270763701549, "grad_norm": 1.9847345352172852, "learning_rate": 9.404398282177629e-07, "loss": 0.798, "step": 22862 }, { "epoch": 0.8075623981738628, "grad_norm": 2.075904607772827, "learning_rate": 9.401059269746654e-07, "loss": 0.77, "step": 22863 }, { "epoch": 0.8075977199775707, "grad_norm": 2.427358388900757, "learning_rate": 9.397720788666992e-07, "loss": 0.772, "step": 22864 }, { "epoch": 0.8076330417812786, "grad_norm": 1.6887145042419434, "learning_rate": 9.394382838982336e-07, "loss": 0.7938, "step": 22865 }, { "epoch": 0.8076683635849865, "grad_norm": 1.8003671169281006, "learning_rate": 9.39104542073635e-07, "loss": 0.75, "step": 22866 }, { "epoch": 0.8077036853886944, "grad_norm": 1.6106374263763428, "learning_rate": 9.387708533972728e-07, "loss": 0.7635, "step": 22867 }, { "epoch": 0.8077390071924023, "grad_norm": 0.8642724752426147, "learning_rate": 9.384372178735157e-07, "loss": 0.5507, "step": 22868 }, { "epoch": 0.8077743289961102, "grad_norm": 1.7087019681930542, "learning_rate": 9.381036355067274e-07, "loss": 0.7707, "step": 22869 }, { "epoch": 0.8078096507998181, "grad_norm": 1.481572151184082, "learning_rate": 9.37770106301275e-07, "loss": 0.773, "step": 22870 }, { "epoch": 0.807844972603526, "grad_norm": 1.5653572082519531, "learning_rate": 9.374366302615245e-07, "loss": 0.7522, "step": 22871 }, { "epoch": 0.8078802944072339, "grad_norm": 1.518132209777832, "learning_rate": 9.371032073918402e-07, "loss": 0.734, "step": 22872 }, { "epoch": 0.8079156162109418, "grad_norm": 1.8400990962982178, "learning_rate": 9.367698376965862e-07, "loss": 0.8027, "step": 22873 }, { "epoch": 0.8079509380146497, "grad_norm": 1.8408756256103516, "learning_rate": 9.36436521180123e-07, "loss": 0.7773, "step": 22874 }, { "epoch": 0.8079862598183576, "grad_norm": 1.7361011505126953, "learning_rate": 9.36103257846816e-07, "loss": 0.7768, "step": 22875 }, { "epoch": 0.8080215816220655, "grad_norm": 1.696666955947876, "learning_rate": 9.357700477010239e-07, "loss": 0.7973, "step": 22876 }, { "epoch": 0.8080569034257734, "grad_norm": 1.5683763027191162, "learning_rate": 9.354368907471101e-07, "loss": 0.7566, "step": 22877 }, { "epoch": 0.8080922252294813, "grad_norm": 1.9678186178207397, "learning_rate": 9.351037869894353e-07, "loss": 0.8016, "step": 22878 }, { "epoch": 0.8081275470331892, "grad_norm": 1.7592283487319946, "learning_rate": 9.347707364323566e-07, "loss": 0.7357, "step": 22879 }, { "epoch": 0.8081628688368971, "grad_norm": 1.7303270101547241, "learning_rate": 9.344377390802345e-07, "loss": 0.8088, "step": 22880 }, { "epoch": 0.808198190640605, "grad_norm": 1.8659367561340332, "learning_rate": 9.341047949374271e-07, "loss": 0.7652, "step": 22881 }, { "epoch": 0.808233512444313, "grad_norm": 1.6655526161193848, "learning_rate": 9.337719040082932e-07, "loss": 0.75, "step": 22882 }, { "epoch": 0.8082688342480209, "grad_norm": 1.5947738885879517, "learning_rate": 9.334390662971865e-07, "loss": 0.7758, "step": 22883 }, { "epoch": 0.8083041560517288, "grad_norm": 1.8187319040298462, "learning_rate": 9.331062818084652e-07, "loss": 0.7865, "step": 22884 }, { "epoch": 0.8083394778554367, "grad_norm": 1.6777770519256592, "learning_rate": 9.327735505464858e-07, "loss": 0.7813, "step": 22885 }, { "epoch": 0.8083747996591446, "grad_norm": 1.682593584060669, "learning_rate": 9.324408725156004e-07, "loss": 0.7469, "step": 22886 }, { "epoch": 0.8084101214628525, "grad_norm": 1.9641660451889038, "learning_rate": 9.321082477201637e-07, "loss": 0.7625, "step": 22887 }, { "epoch": 0.8084454432665604, "grad_norm": 1.6785564422607422, "learning_rate": 9.317756761645314e-07, "loss": 0.8061, "step": 22888 }, { "epoch": 0.8084807650702683, "grad_norm": 1.8157938718795776, "learning_rate": 9.314431578530525e-07, "loss": 0.7627, "step": 22889 }, { "epoch": 0.8085160868739762, "grad_norm": 1.6435844898223877, "learning_rate": 9.311106927900809e-07, "loss": 0.7826, "step": 22890 }, { "epoch": 0.8085514086776842, "grad_norm": 1.6678410768508911, "learning_rate": 9.307782809799693e-07, "loss": 0.7304, "step": 22891 }, { "epoch": 0.8085867304813921, "grad_norm": 1.7663718461990356, "learning_rate": 9.304459224270668e-07, "loss": 0.7535, "step": 22892 }, { "epoch": 0.8086220522851, "grad_norm": 1.6147654056549072, "learning_rate": 9.301136171357211e-07, "loss": 0.7525, "step": 22893 }, { "epoch": 0.8086573740888079, "grad_norm": 0.9370849132537842, "learning_rate": 9.297813651102838e-07, "loss": 0.5903, "step": 22894 }, { "epoch": 0.8086926958925158, "grad_norm": 1.7187920808792114, "learning_rate": 9.294491663551042e-07, "loss": 0.7709, "step": 22895 }, { "epoch": 0.8087280176962237, "grad_norm": 1.5400890111923218, "learning_rate": 9.29117020874527e-07, "loss": 0.7534, "step": 22896 }, { "epoch": 0.8087633394999315, "grad_norm": 1.6720471382141113, "learning_rate": 9.287849286729011e-07, "loss": 0.7682, "step": 22897 }, { "epoch": 0.8087986613036394, "grad_norm": 1.6632126569747925, "learning_rate": 9.284528897545742e-07, "loss": 0.7877, "step": 22898 }, { "epoch": 0.8088339831073473, "grad_norm": 2.021125078201294, "learning_rate": 9.281209041238892e-07, "loss": 0.7528, "step": 22899 }, { "epoch": 0.8088693049110552, "grad_norm": 1.8476437330245972, "learning_rate": 9.277889717851923e-07, "loss": 0.783, "step": 22900 }, { "epoch": 0.8089046267147632, "grad_norm": 1.881595492362976, "learning_rate": 9.274570927428295e-07, "loss": 0.7914, "step": 22901 }, { "epoch": 0.8089399485184711, "grad_norm": 1.6282023191452026, "learning_rate": 9.27125267001141e-07, "loss": 0.7745, "step": 22902 }, { "epoch": 0.808975270322179, "grad_norm": 1.5972564220428467, "learning_rate": 9.267934945644713e-07, "loss": 0.742, "step": 22903 }, { "epoch": 0.8090105921258869, "grad_norm": 1.839255452156067, "learning_rate": 9.264617754371647e-07, "loss": 0.8129, "step": 22904 }, { "epoch": 0.8090459139295948, "grad_norm": 3.1786839962005615, "learning_rate": 9.261301096235586e-07, "loss": 0.7634, "step": 22905 }, { "epoch": 0.8090812357333027, "grad_norm": 1.702351450920105, "learning_rate": 9.257984971279959e-07, "loss": 0.7557, "step": 22906 }, { "epoch": 0.8091165575370106, "grad_norm": 2.0328824520111084, "learning_rate": 9.254669379548181e-07, "loss": 0.788, "step": 22907 }, { "epoch": 0.8091518793407185, "grad_norm": 1.6887023448944092, "learning_rate": 9.251354321083617e-07, "loss": 0.7808, "step": 22908 }, { "epoch": 0.8091872011444264, "grad_norm": 1.5754576921463013, "learning_rate": 9.24803979592967e-07, "loss": 0.7902, "step": 22909 }, { "epoch": 0.8092225229481343, "grad_norm": 2.417086601257324, "learning_rate": 9.244725804129729e-07, "loss": 0.7749, "step": 22910 }, { "epoch": 0.8092578447518423, "grad_norm": 1.9181504249572754, "learning_rate": 9.241412345727158e-07, "loss": 0.7558, "step": 22911 }, { "epoch": 0.8092931665555502, "grad_norm": 1.667396068572998, "learning_rate": 9.238099420765301e-07, "loss": 0.7455, "step": 22912 }, { "epoch": 0.8093284883592581, "grad_norm": 1.7586796283721924, "learning_rate": 9.23478702928754e-07, "loss": 0.7676, "step": 22913 }, { "epoch": 0.809363810162966, "grad_norm": 1.9605052471160889, "learning_rate": 9.23147517133724e-07, "loss": 0.7491, "step": 22914 }, { "epoch": 0.8093991319666739, "grad_norm": 1.7414830923080444, "learning_rate": 9.228163846957711e-07, "loss": 0.7752, "step": 22915 }, { "epoch": 0.8094344537703818, "grad_norm": 2.1004390716552734, "learning_rate": 9.224853056192312e-07, "loss": 0.7386, "step": 22916 }, { "epoch": 0.8094697755740897, "grad_norm": 1.7684067487716675, "learning_rate": 9.221542799084382e-07, "loss": 0.7565, "step": 22917 }, { "epoch": 0.8095050973777976, "grad_norm": 1.8911069631576538, "learning_rate": 9.218233075677224e-07, "loss": 0.753, "step": 22918 }, { "epoch": 0.8095404191815055, "grad_norm": 1.729095697402954, "learning_rate": 9.214923886014166e-07, "loss": 0.7669, "step": 22919 }, { "epoch": 0.8095757409852135, "grad_norm": 1.9443174600601196, "learning_rate": 9.211615230138526e-07, "loss": 0.7384, "step": 22920 }, { "epoch": 0.8096110627889214, "grad_norm": 1.9113802909851074, "learning_rate": 9.208307108093589e-07, "loss": 0.7904, "step": 22921 }, { "epoch": 0.8096463845926293, "grad_norm": 1.6991301774978638, "learning_rate": 9.204999519922658e-07, "loss": 0.7255, "step": 22922 }, { "epoch": 0.8096817063963371, "grad_norm": 1.8087162971496582, "learning_rate": 9.201692465669037e-07, "loss": 0.7625, "step": 22923 }, { "epoch": 0.809717028200045, "grad_norm": 1.6838291883468628, "learning_rate": 9.198385945375987e-07, "loss": 0.755, "step": 22924 }, { "epoch": 0.8097523500037529, "grad_norm": 1.6924567222595215, "learning_rate": 9.195079959086789e-07, "loss": 0.7357, "step": 22925 }, { "epoch": 0.8097876718074608, "grad_norm": 1.802975058555603, "learning_rate": 9.191774506844731e-07, "loss": 0.7457, "step": 22926 }, { "epoch": 0.8098229936111687, "grad_norm": 2.0474934577941895, "learning_rate": 9.188469588693039e-07, "loss": 0.7442, "step": 22927 }, { "epoch": 0.8098583154148766, "grad_norm": 1.6054227352142334, "learning_rate": 9.185165204675001e-07, "loss": 0.7439, "step": 22928 }, { "epoch": 0.8098936372185845, "grad_norm": 1.836897611618042, "learning_rate": 9.181861354833832e-07, "loss": 0.8013, "step": 22929 }, { "epoch": 0.8099289590222924, "grad_norm": 1.6780840158462524, "learning_rate": 9.178558039212809e-07, "loss": 0.7587, "step": 22930 }, { "epoch": 0.8099642808260004, "grad_norm": 1.5974353551864624, "learning_rate": 9.175255257855126e-07, "loss": 0.7721, "step": 22931 }, { "epoch": 0.8099996026297083, "grad_norm": 1.7516192197799683, "learning_rate": 9.17195301080403e-07, "loss": 0.77, "step": 22932 }, { "epoch": 0.8100349244334162, "grad_norm": 2.928985357284546, "learning_rate": 9.168651298102749e-07, "loss": 0.7583, "step": 22933 }, { "epoch": 0.8100702462371241, "grad_norm": 1.9461963176727295, "learning_rate": 9.165350119794475e-07, "loss": 0.7432, "step": 22934 }, { "epoch": 0.810105568040832, "grad_norm": 1.954648733139038, "learning_rate": 9.162049475922419e-07, "loss": 0.7826, "step": 22935 }, { "epoch": 0.8101408898445399, "grad_norm": 1.592960000038147, "learning_rate": 9.158749366529795e-07, "loss": 0.7476, "step": 22936 }, { "epoch": 0.8101762116482478, "grad_norm": 1.689415693283081, "learning_rate": 9.15544979165977e-07, "loss": 0.7609, "step": 22937 }, { "epoch": 0.8102115334519557, "grad_norm": 1.5245487689971924, "learning_rate": 9.152150751355543e-07, "loss": 0.7512, "step": 22938 }, { "epoch": 0.8102468552556636, "grad_norm": 3.7858102321624756, "learning_rate": 9.148852245660289e-07, "loss": 0.7576, "step": 22939 }, { "epoch": 0.8102821770593716, "grad_norm": 1.0172181129455566, "learning_rate": 9.145554274617186e-07, "loss": 0.5904, "step": 22940 }, { "epoch": 0.8103174988630795, "grad_norm": 1.713021993637085, "learning_rate": 9.142256838269381e-07, "loss": 0.7835, "step": 22941 }, { "epoch": 0.8103528206667874, "grad_norm": 1.871084213256836, "learning_rate": 9.13895993666003e-07, "loss": 0.786, "step": 22942 }, { "epoch": 0.8103881424704953, "grad_norm": 1.9098973274230957, "learning_rate": 9.135663569832309e-07, "loss": 0.7526, "step": 22943 }, { "epoch": 0.8104234642742032, "grad_norm": 1.686745524406433, "learning_rate": 9.132367737829329e-07, "loss": 0.7992, "step": 22944 }, { "epoch": 0.8104587860779111, "grad_norm": 1.6553813219070435, "learning_rate": 9.129072440694237e-07, "loss": 0.7684, "step": 22945 }, { "epoch": 0.810494107881619, "grad_norm": 1.6819961071014404, "learning_rate": 9.125777678470177e-07, "loss": 0.7739, "step": 22946 }, { "epoch": 0.8105294296853269, "grad_norm": 1.640254259109497, "learning_rate": 9.122483451200254e-07, "loss": 0.767, "step": 22947 }, { "epoch": 0.8105647514890348, "grad_norm": 1.696190357208252, "learning_rate": 9.119189758927565e-07, "loss": 0.7704, "step": 22948 }, { "epoch": 0.8106000732927426, "grad_norm": 1.758808970451355, "learning_rate": 9.115896601695246e-07, "loss": 0.7733, "step": 22949 }, { "epoch": 0.8106353950964506, "grad_norm": 1.7034164667129517, "learning_rate": 9.112603979546391e-07, "loss": 0.7786, "step": 22950 }, { "epoch": 0.8106707169001585, "grad_norm": 1.855471134185791, "learning_rate": 9.109311892524081e-07, "loss": 0.8107, "step": 22951 }, { "epoch": 0.8107060387038664, "grad_norm": 1.7687811851501465, "learning_rate": 9.106020340671407e-07, "loss": 0.733, "step": 22952 }, { "epoch": 0.8107413605075743, "grad_norm": 1.6469835042953491, "learning_rate": 9.102729324031468e-07, "loss": 0.8003, "step": 22953 }, { "epoch": 0.8107766823112822, "grad_norm": 1.5497740507125854, "learning_rate": 9.099438842647301e-07, "loss": 0.7633, "step": 22954 }, { "epoch": 0.8108120041149901, "grad_norm": 1.6975698471069336, "learning_rate": 9.096148896561996e-07, "loss": 0.7633, "step": 22955 }, { "epoch": 0.810847325918698, "grad_norm": 1.6959104537963867, "learning_rate": 9.092859485818617e-07, "loss": 0.7947, "step": 22956 }, { "epoch": 0.8108826477224059, "grad_norm": 1.7202495336532593, "learning_rate": 9.089570610460185e-07, "loss": 0.767, "step": 22957 }, { "epoch": 0.8109179695261138, "grad_norm": 1.6747921705245972, "learning_rate": 9.086282270529772e-07, "loss": 0.7529, "step": 22958 }, { "epoch": 0.8109532913298217, "grad_norm": 1.6788641214370728, "learning_rate": 9.08299446607041e-07, "loss": 0.7478, "step": 22959 }, { "epoch": 0.8109886131335297, "grad_norm": 1.6225502490997314, "learning_rate": 9.079707197125115e-07, "loss": 0.7393, "step": 22960 }, { "epoch": 0.8110239349372376, "grad_norm": 1.8292996883392334, "learning_rate": 9.076420463736924e-07, "loss": 0.7809, "step": 22961 }, { "epoch": 0.8110592567409455, "grad_norm": 1.7498352527618408, "learning_rate": 9.073134265948857e-07, "loss": 0.7549, "step": 22962 }, { "epoch": 0.8110945785446534, "grad_norm": 1.698754906654358, "learning_rate": 9.069848603803904e-07, "loss": 0.732, "step": 22963 }, { "epoch": 0.8111299003483613, "grad_norm": 1.7355504035949707, "learning_rate": 9.066563477345081e-07, "loss": 0.7696, "step": 22964 }, { "epoch": 0.8111652221520692, "grad_norm": 1.604970097541809, "learning_rate": 9.063278886615389e-07, "loss": 0.7511, "step": 22965 }, { "epoch": 0.8112005439557771, "grad_norm": 1.8334404230117798, "learning_rate": 9.059994831657809e-07, "loss": 0.7649, "step": 22966 }, { "epoch": 0.811235865759485, "grad_norm": 1.696213722229004, "learning_rate": 9.056711312515309e-07, "loss": 0.7446, "step": 22967 }, { "epoch": 0.8112711875631929, "grad_norm": 1.7767314910888672, "learning_rate": 9.053428329230874e-07, "loss": 0.7714, "step": 22968 }, { "epoch": 0.8113065093669009, "grad_norm": 1.783672571182251, "learning_rate": 9.050145881847483e-07, "loss": 0.7292, "step": 22969 }, { "epoch": 0.8113418311706088, "grad_norm": 1.544735312461853, "learning_rate": 9.046863970408076e-07, "loss": 0.7784, "step": 22970 }, { "epoch": 0.8113771529743167, "grad_norm": 10.54387378692627, "learning_rate": 9.043582594955614e-07, "loss": 0.7792, "step": 22971 }, { "epoch": 0.8114124747780246, "grad_norm": 1.7573456764221191, "learning_rate": 9.040301755533055e-07, "loss": 0.7464, "step": 22972 }, { "epoch": 0.8114477965817325, "grad_norm": 1.9204877614974976, "learning_rate": 9.037021452183314e-07, "loss": 0.796, "step": 22973 }, { "epoch": 0.8114831183854404, "grad_norm": 1.6060504913330078, "learning_rate": 9.033741684949337e-07, "loss": 0.7666, "step": 22974 }, { "epoch": 0.8115184401891482, "grad_norm": 1.9355181455612183, "learning_rate": 9.030462453874062e-07, "loss": 0.7352, "step": 22975 }, { "epoch": 0.8115537619928561, "grad_norm": 2.368025779724121, "learning_rate": 9.02718375900038e-07, "loss": 0.7539, "step": 22976 }, { "epoch": 0.811589083796564, "grad_norm": 1.7904644012451172, "learning_rate": 9.023905600371213e-07, "loss": 0.8038, "step": 22977 }, { "epoch": 0.8116244056002719, "grad_norm": 1.7959020137786865, "learning_rate": 9.020627978029483e-07, "loss": 0.7886, "step": 22978 }, { "epoch": 0.8116597274039798, "grad_norm": 1.6788241863250732, "learning_rate": 9.017350892018056e-07, "loss": 0.731, "step": 22979 }, { "epoch": 0.8116950492076878, "grad_norm": 1.6266731023788452, "learning_rate": 9.014074342379836e-07, "loss": 0.7625, "step": 22980 }, { "epoch": 0.8117303710113957, "grad_norm": 1.6365200281143188, "learning_rate": 9.010798329157722e-07, "loss": 0.747, "step": 22981 }, { "epoch": 0.8117656928151036, "grad_norm": 1.7865569591522217, "learning_rate": 9.007522852394557e-07, "loss": 0.7543, "step": 22982 }, { "epoch": 0.8118010146188115, "grad_norm": 1.7070051431655884, "learning_rate": 9.004247912133229e-07, "loss": 0.7609, "step": 22983 }, { "epoch": 0.8118363364225194, "grad_norm": 1.6114102602005005, "learning_rate": 9.000973508416611e-07, "loss": 0.7672, "step": 22984 }, { "epoch": 0.8118716582262273, "grad_norm": 1.9171584844589233, "learning_rate": 8.99769964128755e-07, "loss": 0.7668, "step": 22985 }, { "epoch": 0.8119069800299352, "grad_norm": 1.892389178276062, "learning_rate": 8.994426310788867e-07, "loss": 0.7364, "step": 22986 }, { "epoch": 0.8119423018336431, "grad_norm": 3.6536812782287598, "learning_rate": 8.99115351696343e-07, "loss": 0.7672, "step": 22987 }, { "epoch": 0.811977623637351, "grad_norm": 1.7216793298721313, "learning_rate": 8.987881259854075e-07, "loss": 0.7958, "step": 22988 }, { "epoch": 0.812012945441059, "grad_norm": 1.5505765676498413, "learning_rate": 8.984609539503614e-07, "loss": 0.7707, "step": 22989 }, { "epoch": 0.8120482672447669, "grad_norm": 0.9440697431564331, "learning_rate": 8.981338355954867e-07, "loss": 0.5687, "step": 22990 }, { "epoch": 0.8120835890484748, "grad_norm": 1.6072551012039185, "learning_rate": 8.97806770925067e-07, "loss": 0.7702, "step": 22991 }, { "epoch": 0.8121189108521827, "grad_norm": 1.6430495977401733, "learning_rate": 8.974797599433799e-07, "loss": 0.7768, "step": 22992 }, { "epoch": 0.8121542326558906, "grad_norm": 1.5889909267425537, "learning_rate": 8.97152802654706e-07, "loss": 0.77, "step": 22993 }, { "epoch": 0.8121895544595985, "grad_norm": 1.6564736366271973, "learning_rate": 8.968258990633266e-07, "loss": 0.7634, "step": 22994 }, { "epoch": 0.8122248762633064, "grad_norm": 1.5905990600585938, "learning_rate": 8.964990491735171e-07, "loss": 0.7391, "step": 22995 }, { "epoch": 0.8122601980670143, "grad_norm": 1.5441854000091553, "learning_rate": 8.961722529895573e-07, "loss": 0.7479, "step": 22996 }, { "epoch": 0.8122955198707222, "grad_norm": 1.4657498598098755, "learning_rate": 8.958455105157227e-07, "loss": 0.7098, "step": 22997 }, { "epoch": 0.8123308416744301, "grad_norm": 1.639467716217041, "learning_rate": 8.955188217562921e-07, "loss": 0.7361, "step": 22998 }, { "epoch": 0.8123661634781381, "grad_norm": 0.9760090708732605, "learning_rate": 8.951921867155389e-07, "loss": 0.5746, "step": 22999 }, { "epoch": 0.812401485281846, "grad_norm": 1.5951597690582275, "learning_rate": 8.948656053977384e-07, "loss": 0.7493, "step": 23000 }, { "epoch": 0.8124368070855538, "grad_norm": 1.5502511262893677, "learning_rate": 8.945390778071661e-07, "loss": 0.7551, "step": 23001 }, { "epoch": 0.8124721288892617, "grad_norm": 1.9004876613616943, "learning_rate": 8.942126039480947e-07, "loss": 0.7669, "step": 23002 }, { "epoch": 0.8125074506929696, "grad_norm": 1.8664970397949219, "learning_rate": 8.938861838247959e-07, "loss": 0.7584, "step": 23003 }, { "epoch": 0.8125427724966775, "grad_norm": 1.742619514465332, "learning_rate": 8.935598174415444e-07, "loss": 0.7462, "step": 23004 }, { "epoch": 0.8125780943003854, "grad_norm": 1.770073652267456, "learning_rate": 8.932335048026086e-07, "loss": 0.7509, "step": 23005 }, { "epoch": 0.8126134161040933, "grad_norm": 1.8110750913619995, "learning_rate": 8.929072459122606e-07, "loss": 0.791, "step": 23006 }, { "epoch": 0.8126487379078012, "grad_norm": 1.8128098249435425, "learning_rate": 8.925810407747703e-07, "loss": 0.7958, "step": 23007 }, { "epoch": 0.8126840597115091, "grad_norm": 1.647251009941101, "learning_rate": 8.922548893944089e-07, "loss": 0.7564, "step": 23008 }, { "epoch": 0.812719381515217, "grad_norm": 2.34808611869812, "learning_rate": 8.919287917754415e-07, "loss": 0.7446, "step": 23009 }, { "epoch": 0.812754703318925, "grad_norm": 1.823665976524353, "learning_rate": 8.916027479221384e-07, "loss": 0.7815, "step": 23010 }, { "epoch": 0.8127900251226329, "grad_norm": 1.7502799034118652, "learning_rate": 8.912767578387671e-07, "loss": 0.8132, "step": 23011 }, { "epoch": 0.8128253469263408, "grad_norm": 1.8078383207321167, "learning_rate": 8.909508215295926e-07, "loss": 0.7336, "step": 23012 }, { "epoch": 0.8128606687300487, "grad_norm": 1.8744364976882935, "learning_rate": 8.906249389988808e-07, "loss": 0.796, "step": 23013 }, { "epoch": 0.8128959905337566, "grad_norm": 1.8847191333770752, "learning_rate": 8.902991102508984e-07, "loss": 0.7721, "step": 23014 }, { "epoch": 0.8129313123374645, "grad_norm": 1.721734881401062, "learning_rate": 8.899733352899081e-07, "loss": 0.7699, "step": 23015 }, { "epoch": 0.8129666341411724, "grad_norm": 1.72636079788208, "learning_rate": 8.896476141201737e-07, "loss": 0.7919, "step": 23016 }, { "epoch": 0.8130019559448803, "grad_norm": 1.6886398792266846, "learning_rate": 8.893219467459602e-07, "loss": 0.7916, "step": 23017 }, { "epoch": 0.8130372777485882, "grad_norm": 1.8083289861679077, "learning_rate": 8.889963331715267e-07, "loss": 0.7857, "step": 23018 }, { "epoch": 0.8130725995522962, "grad_norm": 1.7318956851959229, "learning_rate": 8.886707734011368e-07, "loss": 0.7688, "step": 23019 }, { "epoch": 0.8131079213560041, "grad_norm": 1.7946546077728271, "learning_rate": 8.883452674390519e-07, "loss": 0.7459, "step": 23020 }, { "epoch": 0.813143243159712, "grad_norm": 1.6862543821334839, "learning_rate": 8.880198152895314e-07, "loss": 0.7562, "step": 23021 }, { "epoch": 0.8131785649634199, "grad_norm": 1.5473383665084839, "learning_rate": 8.876944169568336e-07, "loss": 0.7471, "step": 23022 }, { "epoch": 0.8132138867671278, "grad_norm": 1.7155259847640991, "learning_rate": 8.873690724452178e-07, "loss": 0.7444, "step": 23023 }, { "epoch": 0.8132492085708357, "grad_norm": 0.9435980916023254, "learning_rate": 8.870437817589439e-07, "loss": 0.5766, "step": 23024 }, { "epoch": 0.8132845303745436, "grad_norm": 2.408158540725708, "learning_rate": 8.86718544902267e-07, "loss": 0.7637, "step": 23025 }, { "epoch": 0.8133198521782515, "grad_norm": 1.7978651523590088, "learning_rate": 8.86393361879444e-07, "loss": 0.7558, "step": 23026 }, { "epoch": 0.8133551739819593, "grad_norm": 1.6747750043869019, "learning_rate": 8.860682326947329e-07, "loss": 0.7441, "step": 23027 }, { "epoch": 0.8133904957856672, "grad_norm": 1.9175668954849243, "learning_rate": 8.857431573523861e-07, "loss": 0.7708, "step": 23028 }, { "epoch": 0.8134258175893752, "grad_norm": 2.0024397373199463, "learning_rate": 8.854181358566593e-07, "loss": 0.7459, "step": 23029 }, { "epoch": 0.8134611393930831, "grad_norm": 1.8398383855819702, "learning_rate": 8.85093168211808e-07, "loss": 0.7763, "step": 23030 }, { "epoch": 0.813496461196791, "grad_norm": 1.6685130596160889, "learning_rate": 8.847682544220826e-07, "loss": 0.7344, "step": 23031 }, { "epoch": 0.8135317830004989, "grad_norm": 1.6318457126617432, "learning_rate": 8.844433944917363e-07, "loss": 0.7847, "step": 23032 }, { "epoch": 0.8135671048042068, "grad_norm": 2.062669515609741, "learning_rate": 8.84118588425023e-07, "loss": 0.7518, "step": 23033 }, { "epoch": 0.8136024266079147, "grad_norm": 1.729037880897522, "learning_rate": 8.837938362261905e-07, "loss": 0.7748, "step": 23034 }, { "epoch": 0.8136377484116226, "grad_norm": 1.7728122472763062, "learning_rate": 8.834691378994903e-07, "loss": 0.7762, "step": 23035 }, { "epoch": 0.8136730702153305, "grad_norm": 1.6645861864089966, "learning_rate": 8.831444934491734e-07, "loss": 0.7457, "step": 23036 }, { "epoch": 0.8137083920190384, "grad_norm": 1.6704034805297852, "learning_rate": 8.828199028794865e-07, "loss": 0.7821, "step": 23037 }, { "epoch": 0.8137437138227464, "grad_norm": 1.8347244262695312, "learning_rate": 8.824953661946789e-07, "loss": 0.7399, "step": 23038 }, { "epoch": 0.8137790356264543, "grad_norm": 1.6725752353668213, "learning_rate": 8.821708833989989e-07, "loss": 0.7711, "step": 23039 }, { "epoch": 0.8138143574301622, "grad_norm": 1.9444401264190674, "learning_rate": 8.81846454496692e-07, "loss": 0.7488, "step": 23040 }, { "epoch": 0.8138496792338701, "grad_norm": 1.6915621757507324, "learning_rate": 8.815220794920037e-07, "loss": 0.7914, "step": 23041 }, { "epoch": 0.813885001037578, "grad_norm": 1.8941668272018433, "learning_rate": 8.811977583891807e-07, "loss": 0.7532, "step": 23042 }, { "epoch": 0.8139203228412859, "grad_norm": 1.7911492586135864, "learning_rate": 8.808734911924677e-07, "loss": 0.7409, "step": 23043 }, { "epoch": 0.8139556446449938, "grad_norm": 1.6712939739227295, "learning_rate": 8.80549277906107e-07, "loss": 0.784, "step": 23044 }, { "epoch": 0.8139909664487017, "grad_norm": 2.581815719604492, "learning_rate": 8.80225118534343e-07, "loss": 0.7782, "step": 23045 }, { "epoch": 0.8140262882524096, "grad_norm": 1.748957872390747, "learning_rate": 8.799010130814201e-07, "loss": 0.7549, "step": 23046 }, { "epoch": 0.8140616100561175, "grad_norm": 1.6733180284500122, "learning_rate": 8.795769615515765e-07, "loss": 0.7782, "step": 23047 }, { "epoch": 0.8140969318598255, "grad_norm": 1.7581223249435425, "learning_rate": 8.792529639490549e-07, "loss": 0.7561, "step": 23048 }, { "epoch": 0.8141322536635334, "grad_norm": 1.6493737697601318, "learning_rate": 8.78929020278097e-07, "loss": 0.761, "step": 23049 }, { "epoch": 0.8141675754672413, "grad_norm": 1.6844629049301147, "learning_rate": 8.786051305429405e-07, "loss": 0.7413, "step": 23050 }, { "epoch": 0.8142028972709492, "grad_norm": 1.7837469577789307, "learning_rate": 8.782812947478253e-07, "loss": 0.7499, "step": 23051 }, { "epoch": 0.8142382190746571, "grad_norm": 1.6700975894927979, "learning_rate": 8.779575128969908e-07, "loss": 0.8039, "step": 23052 }, { "epoch": 0.8142735408783649, "grad_norm": 1.6191914081573486, "learning_rate": 8.776337849946725e-07, "loss": 0.7478, "step": 23053 }, { "epoch": 0.8143088626820728, "grad_norm": 1.7484207153320312, "learning_rate": 8.773101110451082e-07, "loss": 0.7901, "step": 23054 }, { "epoch": 0.8143441844857807, "grad_norm": 1.6705145835876465, "learning_rate": 8.769864910525344e-07, "loss": 0.7563, "step": 23055 }, { "epoch": 0.8143795062894886, "grad_norm": 1.5934206247329712, "learning_rate": 8.766629250211872e-07, "loss": 0.7786, "step": 23056 }, { "epoch": 0.8144148280931965, "grad_norm": 1.7457183599472046, "learning_rate": 8.763394129552999e-07, "loss": 0.7611, "step": 23057 }, { "epoch": 0.8144501498969045, "grad_norm": 3.516298532485962, "learning_rate": 8.76015954859108e-07, "loss": 0.7987, "step": 23058 }, { "epoch": 0.8144854717006124, "grad_norm": 1.7279188632965088, "learning_rate": 8.756925507368441e-07, "loss": 0.7795, "step": 23059 }, { "epoch": 0.8145207935043203, "grad_norm": 1.7968025207519531, "learning_rate": 8.753692005927395e-07, "loss": 0.7617, "step": 23060 }, { "epoch": 0.8145561153080282, "grad_norm": 1.588070273399353, "learning_rate": 8.750459044310272e-07, "loss": 0.7242, "step": 23061 }, { "epoch": 0.8145914371117361, "grad_norm": 1.7866618633270264, "learning_rate": 8.747226622559401e-07, "loss": 0.7389, "step": 23062 }, { "epoch": 0.814626758915444, "grad_norm": 2.079362154006958, "learning_rate": 8.743994740717066e-07, "loss": 0.7419, "step": 23063 }, { "epoch": 0.8146620807191519, "grad_norm": 1.96442449092865, "learning_rate": 8.740763398825569e-07, "loss": 0.7564, "step": 23064 }, { "epoch": 0.8146974025228598, "grad_norm": 1.6844837665557861, "learning_rate": 8.737532596927207e-07, "loss": 0.7655, "step": 23065 }, { "epoch": 0.8147327243265677, "grad_norm": 1.8469337224960327, "learning_rate": 8.734302335064271e-07, "loss": 0.7804, "step": 23066 }, { "epoch": 0.8147680461302756, "grad_norm": 1.6198691129684448, "learning_rate": 8.731072613279018e-07, "loss": 0.7664, "step": 23067 }, { "epoch": 0.8148033679339836, "grad_norm": 1.707648515701294, "learning_rate": 8.727843431613725e-07, "loss": 0.7445, "step": 23068 }, { "epoch": 0.8148386897376915, "grad_norm": 1.763697624206543, "learning_rate": 8.724614790110675e-07, "loss": 0.7881, "step": 23069 }, { "epoch": 0.8148740115413994, "grad_norm": 1.6852390766143799, "learning_rate": 8.721386688812094e-07, "loss": 0.7884, "step": 23070 }, { "epoch": 0.8149093333451073, "grad_norm": 1.8011724948883057, "learning_rate": 8.718159127760245e-07, "loss": 0.7667, "step": 23071 }, { "epoch": 0.8149446551488152, "grad_norm": 1.9132890701293945, "learning_rate": 8.714932106997387e-07, "loss": 0.7907, "step": 23072 }, { "epoch": 0.8149799769525231, "grad_norm": 2.0762252807617188, "learning_rate": 8.711705626565714e-07, "loss": 0.8122, "step": 23073 }, { "epoch": 0.815015298756231, "grad_norm": 1.6581931114196777, "learning_rate": 8.708479686507488e-07, "loss": 0.7983, "step": 23074 }, { "epoch": 0.8150506205599389, "grad_norm": 0.9066751003265381, "learning_rate": 8.705254286864922e-07, "loss": 0.5721, "step": 23075 }, { "epoch": 0.8150859423636468, "grad_norm": 1.7850046157836914, "learning_rate": 8.702029427680225e-07, "loss": 0.7637, "step": 23076 }, { "epoch": 0.8151212641673548, "grad_norm": 1.8102024793624878, "learning_rate": 8.698805108995595e-07, "loss": 0.8113, "step": 23077 }, { "epoch": 0.8151565859710627, "grad_norm": 1.7890651226043701, "learning_rate": 8.695581330853237e-07, "loss": 0.7824, "step": 23078 }, { "epoch": 0.8151919077747705, "grad_norm": 1.6282063722610474, "learning_rate": 8.692358093295361e-07, "loss": 0.7945, "step": 23079 }, { "epoch": 0.8152272295784784, "grad_norm": 1.9824179410934448, "learning_rate": 8.689135396364123e-07, "loss": 0.7429, "step": 23080 }, { "epoch": 0.8152625513821863, "grad_norm": 1.6366158723831177, "learning_rate": 8.685913240101718e-07, "loss": 0.7637, "step": 23081 }, { "epoch": 0.8152978731858942, "grad_norm": 1.7328531742095947, "learning_rate": 8.682691624550327e-07, "loss": 0.7601, "step": 23082 }, { "epoch": 0.8153331949896021, "grad_norm": 1.9094455242156982, "learning_rate": 8.679470549752089e-07, "loss": 0.771, "step": 23083 }, { "epoch": 0.81536851679331, "grad_norm": 1.7205556631088257, "learning_rate": 8.676250015749172e-07, "loss": 0.7933, "step": 23084 }, { "epoch": 0.8154038385970179, "grad_norm": 1.987224817276001, "learning_rate": 8.673030022583733e-07, "loss": 0.7474, "step": 23085 }, { "epoch": 0.8154391604007258, "grad_norm": 1.7529170513153076, "learning_rate": 8.669810570297904e-07, "loss": 0.7873, "step": 23086 }, { "epoch": 0.8154744822044337, "grad_norm": 1.7875102758407593, "learning_rate": 8.666591658933821e-07, "loss": 0.7644, "step": 23087 }, { "epoch": 0.8155098040081417, "grad_norm": 2.0100152492523193, "learning_rate": 8.663373288533633e-07, "loss": 0.7741, "step": 23088 }, { "epoch": 0.8155451258118496, "grad_norm": 2.248809337615967, "learning_rate": 8.660155459139429e-07, "loss": 0.7996, "step": 23089 }, { "epoch": 0.8155804476155575, "grad_norm": 1.7030469179153442, "learning_rate": 8.656938170793339e-07, "loss": 0.7439, "step": 23090 }, { "epoch": 0.8156157694192654, "grad_norm": 1.8331589698791504, "learning_rate": 8.653721423537487e-07, "loss": 0.7546, "step": 23091 }, { "epoch": 0.8156510912229733, "grad_norm": 1.5743387937545776, "learning_rate": 8.65050521741394e-07, "loss": 0.7397, "step": 23092 }, { "epoch": 0.8156864130266812, "grad_norm": 1.5683836936950684, "learning_rate": 8.64728955246481e-07, "loss": 0.7702, "step": 23093 }, { "epoch": 0.8157217348303891, "grad_norm": 1.8907554149627686, "learning_rate": 8.644074428732191e-07, "loss": 0.7797, "step": 23094 }, { "epoch": 0.815757056634097, "grad_norm": 1.6847035884857178, "learning_rate": 8.640859846258154e-07, "loss": 0.772, "step": 23095 }, { "epoch": 0.8157923784378049, "grad_norm": 2.109724998474121, "learning_rate": 8.637645805084754e-07, "loss": 0.7557, "step": 23096 }, { "epoch": 0.8158277002415129, "grad_norm": 1.756996989250183, "learning_rate": 8.634432305254075e-07, "loss": 0.7587, "step": 23097 }, { "epoch": 0.8158630220452208, "grad_norm": 1.8689846992492676, "learning_rate": 8.631219346808179e-07, "loss": 0.7857, "step": 23098 }, { "epoch": 0.8158983438489287, "grad_norm": 1.7106853723526, "learning_rate": 8.628006929789095e-07, "loss": 0.7195, "step": 23099 }, { "epoch": 0.8159336656526366, "grad_norm": 1.7114468812942505, "learning_rate": 8.624795054238883e-07, "loss": 0.74, "step": 23100 }, { "epoch": 0.8159689874563445, "grad_norm": 1.781991958618164, "learning_rate": 8.621583720199584e-07, "loss": 0.7787, "step": 23101 }, { "epoch": 0.8160043092600524, "grad_norm": 1.7304171323776245, "learning_rate": 8.618372927713209e-07, "loss": 0.7874, "step": 23102 }, { "epoch": 0.8160396310637603, "grad_norm": 1.749515414237976, "learning_rate": 8.615162676821787e-07, "loss": 0.7223, "step": 23103 }, { "epoch": 0.8160749528674682, "grad_norm": 1.7009971141815186, "learning_rate": 8.61195296756735e-07, "loss": 0.7828, "step": 23104 }, { "epoch": 0.8161102746711761, "grad_norm": 1.7547786235809326, "learning_rate": 8.608743799991881e-07, "loss": 0.759, "step": 23105 }, { "epoch": 0.8161455964748839, "grad_norm": 1.5606653690338135, "learning_rate": 8.605535174137392e-07, "loss": 0.7369, "step": 23106 }, { "epoch": 0.8161809182785918, "grad_norm": 1.7665003538131714, "learning_rate": 8.602327090045892e-07, "loss": 0.7423, "step": 23107 }, { "epoch": 0.8162162400822998, "grad_norm": 1.6356884241104126, "learning_rate": 8.599119547759338e-07, "loss": 0.7724, "step": 23108 }, { "epoch": 0.8162515618860077, "grad_norm": 1.6320747137069702, "learning_rate": 8.595912547319729e-07, "loss": 0.7667, "step": 23109 }, { "epoch": 0.8162868836897156, "grad_norm": 1.7279771566390991, "learning_rate": 8.592706088769037e-07, "loss": 0.738, "step": 23110 }, { "epoch": 0.8163222054934235, "grad_norm": 1.594702124595642, "learning_rate": 8.589500172149218e-07, "loss": 0.8152, "step": 23111 }, { "epoch": 0.8163575272971314, "grad_norm": 1.5816534757614136, "learning_rate": 8.586294797502237e-07, "loss": 0.7345, "step": 23112 }, { "epoch": 0.8163928491008393, "grad_norm": 2.1380367279052734, "learning_rate": 8.583089964870056e-07, "loss": 0.7511, "step": 23113 }, { "epoch": 0.8164281709045472, "grad_norm": 2.280690908432007, "learning_rate": 8.579885674294608e-07, "loss": 0.7663, "step": 23114 }, { "epoch": 0.8164634927082551, "grad_norm": 1.528910517692566, "learning_rate": 8.57668192581782e-07, "loss": 0.7291, "step": 23115 }, { "epoch": 0.816498814511963, "grad_norm": 1.5662274360656738, "learning_rate": 8.573478719481626e-07, "loss": 0.755, "step": 23116 }, { "epoch": 0.816534136315671, "grad_norm": 1.5758178234100342, "learning_rate": 8.570276055327975e-07, "loss": 0.7226, "step": 23117 }, { "epoch": 0.8165694581193789, "grad_norm": 1.8825023174285889, "learning_rate": 8.567073933398745e-07, "loss": 0.7533, "step": 23118 }, { "epoch": 0.8166047799230868, "grad_norm": 1.6995383501052856, "learning_rate": 8.563872353735864e-07, "loss": 0.797, "step": 23119 }, { "epoch": 0.8166401017267947, "grad_norm": 1.6041440963745117, "learning_rate": 8.560671316381247e-07, "loss": 0.7441, "step": 23120 }, { "epoch": 0.8166754235305026, "grad_norm": 1.8657631874084473, "learning_rate": 8.557470821376762e-07, "loss": 0.7831, "step": 23121 }, { "epoch": 0.8167107453342105, "grad_norm": 1.72550630569458, "learning_rate": 8.554270868764303e-07, "loss": 0.7624, "step": 23122 }, { "epoch": 0.8167460671379184, "grad_norm": 1.6409249305725098, "learning_rate": 8.551071458585764e-07, "loss": 0.7515, "step": 23123 }, { "epoch": 0.8167813889416263, "grad_norm": 1.7120741605758667, "learning_rate": 8.547872590883016e-07, "loss": 0.7877, "step": 23124 }, { "epoch": 0.8168167107453342, "grad_norm": 1.8145725727081299, "learning_rate": 8.54467426569791e-07, "loss": 0.7422, "step": 23125 }, { "epoch": 0.8168520325490422, "grad_norm": 1.8165398836135864, "learning_rate": 8.541476483072314e-07, "loss": 0.7834, "step": 23126 }, { "epoch": 0.8168873543527501, "grad_norm": 1.5750442743301392, "learning_rate": 8.538279243048092e-07, "loss": 0.7266, "step": 23127 }, { "epoch": 0.816922676156458, "grad_norm": 1.6711374521255493, "learning_rate": 8.535082545667067e-07, "loss": 0.767, "step": 23128 }, { "epoch": 0.8169579979601659, "grad_norm": 1.8473219871520996, "learning_rate": 8.531886390971083e-07, "loss": 0.7862, "step": 23129 }, { "epoch": 0.8169933197638738, "grad_norm": 1.5221316814422607, "learning_rate": 8.528690779001992e-07, "loss": 0.743, "step": 23130 }, { "epoch": 0.8170286415675817, "grad_norm": 1.6235225200653076, "learning_rate": 8.525495709801596e-07, "loss": 0.7338, "step": 23131 }, { "epoch": 0.8170639633712895, "grad_norm": 1.6868021488189697, "learning_rate": 8.522301183411707e-07, "loss": 0.7674, "step": 23132 }, { "epoch": 0.8170992851749974, "grad_norm": 1.6140379905700684, "learning_rate": 8.519107199874143e-07, "loss": 0.7893, "step": 23133 }, { "epoch": 0.8171346069787053, "grad_norm": 1.6017556190490723, "learning_rate": 8.515913759230715e-07, "loss": 0.7402, "step": 23134 }, { "epoch": 0.8171699287824132, "grad_norm": 1.6015158891677856, "learning_rate": 8.512720861523199e-07, "loss": 0.7326, "step": 23135 }, { "epoch": 0.8172052505861211, "grad_norm": 1.791343331336975, "learning_rate": 8.509528506793396e-07, "loss": 0.7627, "step": 23136 }, { "epoch": 0.8172405723898291, "grad_norm": 1.6612035036087036, "learning_rate": 8.506336695083095e-07, "loss": 0.7243, "step": 23137 }, { "epoch": 0.817275894193537, "grad_norm": 1.5548397302627563, "learning_rate": 8.503145426434045e-07, "loss": 0.7395, "step": 23138 }, { "epoch": 0.8173112159972449, "grad_norm": 1.717421293258667, "learning_rate": 8.49995470088803e-07, "loss": 0.7826, "step": 23139 }, { "epoch": 0.8173465378009528, "grad_norm": 1.7180372476577759, "learning_rate": 8.496764518486822e-07, "loss": 0.7825, "step": 23140 }, { "epoch": 0.8173818596046607, "grad_norm": 1.6768889427185059, "learning_rate": 8.493574879272143e-07, "loss": 0.7626, "step": 23141 }, { "epoch": 0.8174171814083686, "grad_norm": 1.5817630290985107, "learning_rate": 8.490385783285748e-07, "loss": 0.7649, "step": 23142 }, { "epoch": 0.8174525032120765, "grad_norm": 1.748558521270752, "learning_rate": 8.4871972305694e-07, "loss": 0.724, "step": 23143 }, { "epoch": 0.8174878250157844, "grad_norm": 1.7850725650787354, "learning_rate": 8.484009221164796e-07, "loss": 0.8073, "step": 23144 }, { "epoch": 0.8175231468194923, "grad_norm": 1.8204259872436523, "learning_rate": 8.480821755113678e-07, "loss": 0.7694, "step": 23145 }, { "epoch": 0.8175584686232003, "grad_norm": 1.7420293092727661, "learning_rate": 8.47763483245777e-07, "loss": 0.7857, "step": 23146 }, { "epoch": 0.8175937904269082, "grad_norm": 1.5407357215881348, "learning_rate": 8.474448453238759e-07, "loss": 0.7467, "step": 23147 }, { "epoch": 0.8176291122306161, "grad_norm": 1.6603773832321167, "learning_rate": 8.471262617498366e-07, "loss": 0.7731, "step": 23148 }, { "epoch": 0.817664434034324, "grad_norm": 2.0514910221099854, "learning_rate": 8.46807732527829e-07, "loss": 0.7443, "step": 23149 }, { "epoch": 0.8176997558380319, "grad_norm": 1.6869356632232666, "learning_rate": 8.464892576620209e-07, "loss": 0.7765, "step": 23150 }, { "epoch": 0.8177350776417398, "grad_norm": 1.7033475637435913, "learning_rate": 8.461708371565797e-07, "loss": 0.7458, "step": 23151 }, { "epoch": 0.8177703994454477, "grad_norm": 1.8075268268585205, "learning_rate": 8.458524710156735e-07, "loss": 0.7729, "step": 23152 }, { "epoch": 0.8178057212491556, "grad_norm": 1.7966150045394897, "learning_rate": 8.455341592434701e-07, "loss": 0.7609, "step": 23153 }, { "epoch": 0.8178410430528635, "grad_norm": 1.792679786682129, "learning_rate": 8.452159018441341e-07, "loss": 0.7992, "step": 23154 }, { "epoch": 0.8178763648565714, "grad_norm": 1.7242512702941895, "learning_rate": 8.44897698821831e-07, "loss": 0.7727, "step": 23155 }, { "epoch": 0.8179116866602794, "grad_norm": 1.7104129791259766, "learning_rate": 8.445795501807275e-07, "loss": 0.7569, "step": 23156 }, { "epoch": 0.8179470084639873, "grad_norm": 1.7269718647003174, "learning_rate": 8.44261455924984e-07, "loss": 0.7296, "step": 23157 }, { "epoch": 0.8179823302676951, "grad_norm": 1.7149438858032227, "learning_rate": 8.439434160587656e-07, "loss": 0.7436, "step": 23158 }, { "epoch": 0.818017652071403, "grad_norm": 1.6197266578674316, "learning_rate": 8.436254305862357e-07, "loss": 0.7626, "step": 23159 }, { "epoch": 0.8180529738751109, "grad_norm": 1.6054351329803467, "learning_rate": 8.433074995115536e-07, "loss": 0.7915, "step": 23160 }, { "epoch": 0.8180882956788188, "grad_norm": 1.8200687170028687, "learning_rate": 8.429896228388812e-07, "loss": 0.7764, "step": 23161 }, { "epoch": 0.8181236174825267, "grad_norm": 1.9923492670059204, "learning_rate": 8.426718005723811e-07, "loss": 0.7532, "step": 23162 }, { "epoch": 0.8181589392862346, "grad_norm": 1.5705903768539429, "learning_rate": 8.423540327162094e-07, "loss": 0.7432, "step": 23163 }, { "epoch": 0.8181942610899425, "grad_norm": 2.091585397720337, "learning_rate": 8.420363192745262e-07, "loss": 0.7804, "step": 23164 }, { "epoch": 0.8182295828936504, "grad_norm": 1.7491188049316406, "learning_rate": 8.417186602514921e-07, "loss": 0.7627, "step": 23165 }, { "epoch": 0.8182649046973584, "grad_norm": 2.092242479324341, "learning_rate": 8.414010556512609e-07, "loss": 0.8057, "step": 23166 }, { "epoch": 0.8183002265010663, "grad_norm": 1.9616440534591675, "learning_rate": 8.410835054779909e-07, "loss": 0.7954, "step": 23167 }, { "epoch": 0.8183355483047742, "grad_norm": 1.7536896467208862, "learning_rate": 8.407660097358389e-07, "loss": 0.7448, "step": 23168 }, { "epoch": 0.8183708701084821, "grad_norm": 1.6717123985290527, "learning_rate": 8.404485684289604e-07, "loss": 0.7753, "step": 23169 }, { "epoch": 0.81840619191219, "grad_norm": 1.7660694122314453, "learning_rate": 8.401311815615071e-07, "loss": 0.758, "step": 23170 }, { "epoch": 0.8184415137158979, "grad_norm": 1.7270667552947998, "learning_rate": 8.398138491376356e-07, "loss": 0.8145, "step": 23171 }, { "epoch": 0.8184768355196058, "grad_norm": 1.8812888860702515, "learning_rate": 8.394965711614995e-07, "loss": 0.7769, "step": 23172 }, { "epoch": 0.8185121573233137, "grad_norm": 1.6943796873092651, "learning_rate": 8.391793476372489e-07, "loss": 0.8063, "step": 23173 }, { "epoch": 0.8185474791270216, "grad_norm": 1.6478945016860962, "learning_rate": 8.388621785690365e-07, "loss": 0.7691, "step": 23174 }, { "epoch": 0.8185828009307295, "grad_norm": 1.7117334604263306, "learning_rate": 8.385450639610154e-07, "loss": 0.7586, "step": 23175 }, { "epoch": 0.8186181227344375, "grad_norm": 1.7741461992263794, "learning_rate": 8.382280038173329e-07, "loss": 0.7518, "step": 23176 }, { "epoch": 0.8186534445381454, "grad_norm": 1.554874062538147, "learning_rate": 8.379109981421396e-07, "loss": 0.7377, "step": 23177 }, { "epoch": 0.8186887663418533, "grad_norm": 1.9040660858154297, "learning_rate": 8.375940469395866e-07, "loss": 0.7922, "step": 23178 }, { "epoch": 0.8187240881455612, "grad_norm": 1.6309319734573364, "learning_rate": 8.372771502138182e-07, "loss": 0.7738, "step": 23179 }, { "epoch": 0.8187594099492691, "grad_norm": 1.7136249542236328, "learning_rate": 8.369603079689847e-07, "loss": 0.7335, "step": 23180 }, { "epoch": 0.818794731752977, "grad_norm": 1.6162890195846558, "learning_rate": 8.366435202092321e-07, "loss": 0.8011, "step": 23181 }, { "epoch": 0.8188300535566849, "grad_norm": 1.6724903583526611, "learning_rate": 8.36326786938707e-07, "loss": 0.7593, "step": 23182 }, { "epoch": 0.8188653753603928, "grad_norm": 1.6451643705368042, "learning_rate": 8.36010108161554e-07, "loss": 0.7589, "step": 23183 }, { "epoch": 0.8189006971641006, "grad_norm": 4.176749229431152, "learning_rate": 8.356934838819175e-07, "loss": 0.7629, "step": 23184 }, { "epoch": 0.8189360189678085, "grad_norm": 1.6042591333389282, "learning_rate": 8.353769141039431e-07, "loss": 0.7747, "step": 23185 }, { "epoch": 0.8189713407715165, "grad_norm": 1.6833112239837646, "learning_rate": 8.350603988317713e-07, "loss": 0.7464, "step": 23186 }, { "epoch": 0.8190066625752244, "grad_norm": 1.6995103359222412, "learning_rate": 8.347439380695477e-07, "loss": 0.7976, "step": 23187 }, { "epoch": 0.8190419843789323, "grad_norm": 1.7288167476654053, "learning_rate": 8.344275318214118e-07, "loss": 0.7516, "step": 23188 }, { "epoch": 0.8190773061826402, "grad_norm": 1.7936208248138428, "learning_rate": 8.341111800915047e-07, "loss": 0.7539, "step": 23189 }, { "epoch": 0.8191126279863481, "grad_norm": 1.7283003330230713, "learning_rate": 8.337948828839676e-07, "loss": 0.7797, "step": 23190 }, { "epoch": 0.819147949790056, "grad_norm": 1.6202658414840698, "learning_rate": 8.334786402029393e-07, "loss": 0.7372, "step": 23191 }, { "epoch": 0.8191832715937639, "grad_norm": 1.660502314567566, "learning_rate": 8.331624520525611e-07, "loss": 0.7401, "step": 23192 }, { "epoch": 0.8192185933974718, "grad_norm": 1.7796859741210938, "learning_rate": 8.328463184369679e-07, "loss": 0.7784, "step": 23193 }, { "epoch": 0.8192539152011797, "grad_norm": 1.6883881092071533, "learning_rate": 8.325302393602986e-07, "loss": 0.7336, "step": 23194 }, { "epoch": 0.8192892370048876, "grad_norm": 1.6091029644012451, "learning_rate": 8.322142148266915e-07, "loss": 0.8011, "step": 23195 }, { "epoch": 0.8193245588085956, "grad_norm": 1.6888800859451294, "learning_rate": 8.318982448402801e-07, "loss": 0.7664, "step": 23196 }, { "epoch": 0.8193598806123035, "grad_norm": 1.6552034616470337, "learning_rate": 8.315823294052012e-07, "loss": 0.7108, "step": 23197 }, { "epoch": 0.8193952024160114, "grad_norm": 1.9386816024780273, "learning_rate": 8.3126646852559e-07, "loss": 0.7269, "step": 23198 }, { "epoch": 0.8194305242197193, "grad_norm": 1.6467912197113037, "learning_rate": 8.309506622055785e-07, "loss": 0.761, "step": 23199 }, { "epoch": 0.8194658460234272, "grad_norm": 1.9527466297149658, "learning_rate": 8.306349104493012e-07, "loss": 0.7641, "step": 23200 }, { "epoch": 0.8195011678271351, "grad_norm": 1.690709114074707, "learning_rate": 8.303192132608917e-07, "loss": 0.7653, "step": 23201 }, { "epoch": 0.819536489630843, "grad_norm": 1.7190697193145752, "learning_rate": 8.300035706444792e-07, "loss": 0.7817, "step": 23202 }, { "epoch": 0.8195718114345509, "grad_norm": 1.6560758352279663, "learning_rate": 8.296879826041959e-07, "loss": 0.7751, "step": 23203 }, { "epoch": 0.8196071332382588, "grad_norm": 1.5861328840255737, "learning_rate": 8.293724491441741e-07, "loss": 0.7618, "step": 23204 }, { "epoch": 0.8196424550419668, "grad_norm": 1.7738112211227417, "learning_rate": 8.290569702685408e-07, "loss": 0.7557, "step": 23205 }, { "epoch": 0.8196777768456747, "grad_norm": 1.8256090879440308, "learning_rate": 8.287415459814252e-07, "loss": 0.7841, "step": 23206 }, { "epoch": 0.8197130986493826, "grad_norm": 1.699227213859558, "learning_rate": 8.284261762869561e-07, "loss": 0.7568, "step": 23207 }, { "epoch": 0.8197484204530905, "grad_norm": 1.9532020092010498, "learning_rate": 8.281108611892624e-07, "loss": 0.7898, "step": 23208 }, { "epoch": 0.8197837422567984, "grad_norm": 1.70884370803833, "learning_rate": 8.277956006924681e-07, "loss": 0.7704, "step": 23209 }, { "epoch": 0.8198190640605062, "grad_norm": 1.6597102880477905, "learning_rate": 8.274803948007009e-07, "loss": 0.7761, "step": 23210 }, { "epoch": 0.8198543858642141, "grad_norm": 1.6903501749038696, "learning_rate": 8.271652435180871e-07, "loss": 0.7688, "step": 23211 }, { "epoch": 0.819889707667922, "grad_norm": 1.7054322957992554, "learning_rate": 8.268501468487489e-07, "loss": 0.7734, "step": 23212 }, { "epoch": 0.8199250294716299, "grad_norm": 1.6772609949111938, "learning_rate": 8.265351047968112e-07, "loss": 0.7634, "step": 23213 }, { "epoch": 0.8199603512753378, "grad_norm": 1.5498703718185425, "learning_rate": 8.262201173663992e-07, "loss": 0.7459, "step": 23214 }, { "epoch": 0.8199956730790458, "grad_norm": 1.667168378829956, "learning_rate": 8.259051845616323e-07, "loss": 0.7458, "step": 23215 }, { "epoch": 0.8200309948827537, "grad_norm": 1.6081401109695435, "learning_rate": 8.255903063866339e-07, "loss": 0.7511, "step": 23216 }, { "epoch": 0.8200663166864616, "grad_norm": 1.5898548364639282, "learning_rate": 8.252754828455256e-07, "loss": 0.7639, "step": 23217 }, { "epoch": 0.8201016384901695, "grad_norm": 1.874022364616394, "learning_rate": 8.249607139424259e-07, "loss": 0.7525, "step": 23218 }, { "epoch": 0.8201369602938774, "grad_norm": 1.7042970657348633, "learning_rate": 8.24645999681456e-07, "loss": 0.7644, "step": 23219 }, { "epoch": 0.8201722820975853, "grad_norm": 1.921128273010254, "learning_rate": 8.243313400667353e-07, "loss": 0.7913, "step": 23220 }, { "epoch": 0.8202076039012932, "grad_norm": 1.5764628648757935, "learning_rate": 8.240167351023798e-07, "loss": 0.7941, "step": 23221 }, { "epoch": 0.8202429257050011, "grad_norm": 1.7852637767791748, "learning_rate": 8.237021847925086e-07, "loss": 0.7664, "step": 23222 }, { "epoch": 0.820278247508709, "grad_norm": 1.67039954662323, "learning_rate": 8.23387689141239e-07, "loss": 0.765, "step": 23223 }, { "epoch": 0.820313569312417, "grad_norm": 1.5668950080871582, "learning_rate": 8.230732481526865e-07, "loss": 0.7452, "step": 23224 }, { "epoch": 0.8203488911161249, "grad_norm": 1.7561631202697754, "learning_rate": 8.227588618309646e-07, "loss": 0.7602, "step": 23225 }, { "epoch": 0.8203842129198328, "grad_norm": 2.4373042583465576, "learning_rate": 8.224445301801898e-07, "loss": 0.7818, "step": 23226 }, { "epoch": 0.8204195347235407, "grad_norm": 1.6224384307861328, "learning_rate": 8.221302532044767e-07, "loss": 0.7563, "step": 23227 }, { "epoch": 0.8204548565272486, "grad_norm": 1.5783326625823975, "learning_rate": 8.218160309079365e-07, "loss": 0.736, "step": 23228 }, { "epoch": 0.8204901783309565, "grad_norm": 1.9723336696624756, "learning_rate": 8.215018632946825e-07, "loss": 0.7639, "step": 23229 }, { "epoch": 0.8205255001346644, "grad_norm": 1.6448297500610352, "learning_rate": 8.211877503688276e-07, "loss": 0.7683, "step": 23230 }, { "epoch": 0.8205608219383723, "grad_norm": 1.489548683166504, "learning_rate": 8.208736921344812e-07, "loss": 0.75, "step": 23231 }, { "epoch": 0.8205961437420802, "grad_norm": 1.5740305185317993, "learning_rate": 8.20559688595754e-07, "loss": 0.7719, "step": 23232 }, { "epoch": 0.8206314655457881, "grad_norm": 2.0023210048675537, "learning_rate": 8.20245739756757e-07, "loss": 0.8079, "step": 23233 }, { "epoch": 0.820666787349496, "grad_norm": 1.5867078304290771, "learning_rate": 8.199318456215971e-07, "loss": 0.751, "step": 23234 }, { "epoch": 0.820702109153204, "grad_norm": 1.5615788698196411, "learning_rate": 8.196180061943837e-07, "loss": 0.7453, "step": 23235 }, { "epoch": 0.8207374309569118, "grad_norm": 2.1526052951812744, "learning_rate": 8.193042214792252e-07, "loss": 0.773, "step": 23236 }, { "epoch": 0.8207727527606197, "grad_norm": 1.6349444389343262, "learning_rate": 8.189904914802255e-07, "loss": 0.7454, "step": 23237 }, { "epoch": 0.8208080745643276, "grad_norm": 1.5911246538162231, "learning_rate": 8.186768162014924e-07, "loss": 0.7672, "step": 23238 }, { "epoch": 0.8208433963680355, "grad_norm": 1.961572289466858, "learning_rate": 8.18363195647131e-07, "loss": 0.772, "step": 23239 }, { "epoch": 0.8208787181717434, "grad_norm": 1.7606045007705688, "learning_rate": 8.180496298212475e-07, "loss": 0.7793, "step": 23240 }, { "epoch": 0.8209140399754513, "grad_norm": 1.83799409866333, "learning_rate": 8.177361187279431e-07, "loss": 0.7531, "step": 23241 }, { "epoch": 0.8209493617791592, "grad_norm": 1.7890437841415405, "learning_rate": 8.174226623713238e-07, "loss": 0.7679, "step": 23242 }, { "epoch": 0.8209846835828671, "grad_norm": 0.9295334815979004, "learning_rate": 8.171092607554898e-07, "loss": 0.5586, "step": 23243 }, { "epoch": 0.821020005386575, "grad_norm": 1.7073554992675781, "learning_rate": 8.167959138845427e-07, "loss": 0.7616, "step": 23244 }, { "epoch": 0.821055327190283, "grad_norm": 1.8311995267868042, "learning_rate": 8.164826217625843e-07, "loss": 0.7215, "step": 23245 }, { "epoch": 0.8210906489939909, "grad_norm": 1.7781332731246948, "learning_rate": 8.161693843937158e-07, "loss": 0.7466, "step": 23246 }, { "epoch": 0.8211259707976988, "grad_norm": 1.7598234415054321, "learning_rate": 8.158562017820349e-07, "loss": 0.7932, "step": 23247 }, { "epoch": 0.8211612926014067, "grad_norm": 1.5810317993164062, "learning_rate": 8.155430739316416e-07, "loss": 0.7513, "step": 23248 }, { "epoch": 0.8211966144051146, "grad_norm": 1.7928991317749023, "learning_rate": 8.15230000846634e-07, "loss": 0.7895, "step": 23249 }, { "epoch": 0.8212319362088225, "grad_norm": 1.6622854471206665, "learning_rate": 8.149169825311104e-07, "loss": 0.7628, "step": 23250 }, { "epoch": 0.8212672580125304, "grad_norm": 1.5624533891677856, "learning_rate": 8.146040189891658e-07, "loss": 0.7591, "step": 23251 }, { "epoch": 0.8213025798162383, "grad_norm": 1.6008567810058594, "learning_rate": 8.142911102248968e-07, "loss": 0.7352, "step": 23252 }, { "epoch": 0.8213379016199462, "grad_norm": 1.6890888214111328, "learning_rate": 8.139782562423998e-07, "loss": 0.7581, "step": 23253 }, { "epoch": 0.8213732234236542, "grad_norm": 2.208235263824463, "learning_rate": 8.13665457045768e-07, "loss": 0.7722, "step": 23254 }, { "epoch": 0.8214085452273621, "grad_norm": 1.720116376876831, "learning_rate": 8.133527126390949e-07, "loss": 0.7523, "step": 23255 }, { "epoch": 0.82144386703107, "grad_norm": 1.5338038206100464, "learning_rate": 8.130400230264768e-07, "loss": 0.7516, "step": 23256 }, { "epoch": 0.8214791888347779, "grad_norm": 1.666657567024231, "learning_rate": 8.127273882120018e-07, "loss": 0.7361, "step": 23257 }, { "epoch": 0.8215145106384858, "grad_norm": 1.6160825490951538, "learning_rate": 8.12414808199764e-07, "loss": 0.7537, "step": 23258 }, { "epoch": 0.8215498324421937, "grad_norm": 1.7315049171447754, "learning_rate": 8.121022829938546e-07, "loss": 0.7504, "step": 23259 }, { "epoch": 0.8215851542459016, "grad_norm": 1.8878604173660278, "learning_rate": 8.117898125983625e-07, "loss": 0.7854, "step": 23260 }, { "epoch": 0.8216204760496095, "grad_norm": 1.9307194948196411, "learning_rate": 8.114773970173795e-07, "loss": 0.7645, "step": 23261 }, { "epoch": 0.8216557978533173, "grad_norm": 0.8731529712677002, "learning_rate": 8.111650362549916e-07, "loss": 0.577, "step": 23262 }, { "epoch": 0.8216911196570252, "grad_norm": 1.7218718528747559, "learning_rate": 8.108527303152897e-07, "loss": 0.7916, "step": 23263 }, { "epoch": 0.8217264414607331, "grad_norm": 4.112502574920654, "learning_rate": 8.10540479202358e-07, "loss": 0.753, "step": 23264 }, { "epoch": 0.8217617632644411, "grad_norm": 1.6733275651931763, "learning_rate": 8.102282829202851e-07, "loss": 0.7606, "step": 23265 }, { "epoch": 0.821797085068149, "grad_norm": 2.0764803886413574, "learning_rate": 8.099161414731582e-07, "loss": 0.7909, "step": 23266 }, { "epoch": 0.8218324068718569, "grad_norm": 1.7255566120147705, "learning_rate": 8.096040548650602e-07, "loss": 0.7717, "step": 23267 }, { "epoch": 0.8218677286755648, "grad_norm": 1.6722838878631592, "learning_rate": 8.092920231000767e-07, "loss": 0.7857, "step": 23268 }, { "epoch": 0.8219030504792727, "grad_norm": 8.5782470703125, "learning_rate": 8.089800461822922e-07, "loss": 0.7591, "step": 23269 }, { "epoch": 0.8219383722829806, "grad_norm": 1.5270192623138428, "learning_rate": 8.086681241157879e-07, "loss": 0.7452, "step": 23270 }, { "epoch": 0.8219736940866885, "grad_norm": 1.6882190704345703, "learning_rate": 8.083562569046477e-07, "loss": 0.7596, "step": 23271 }, { "epoch": 0.8220090158903964, "grad_norm": 1.9524327516555786, "learning_rate": 8.080444445529534e-07, "loss": 0.7861, "step": 23272 }, { "epoch": 0.8220443376941043, "grad_norm": 1.619016408920288, "learning_rate": 8.07732687064785e-07, "loss": 0.7764, "step": 23273 }, { "epoch": 0.8220796594978123, "grad_norm": 1.9350636005401611, "learning_rate": 8.074209844442227e-07, "loss": 0.7814, "step": 23274 }, { "epoch": 0.8221149813015202, "grad_norm": 1.7677056789398193, "learning_rate": 8.071093366953475e-07, "loss": 0.7385, "step": 23275 }, { "epoch": 0.8221503031052281, "grad_norm": 1.7096824645996094, "learning_rate": 8.067977438222363e-07, "loss": 0.7551, "step": 23276 }, { "epoch": 0.822185624908936, "grad_norm": 1.5349953174591064, "learning_rate": 8.064862058289674e-07, "loss": 0.7388, "step": 23277 }, { "epoch": 0.8222209467126439, "grad_norm": 1.6767430305480957, "learning_rate": 8.061747227196203e-07, "loss": 0.7506, "step": 23278 }, { "epoch": 0.8222562685163518, "grad_norm": 2.030930995941162, "learning_rate": 8.058632944982702e-07, "loss": 0.7669, "step": 23279 }, { "epoch": 0.8222915903200597, "grad_norm": 1.7075105905532837, "learning_rate": 8.055519211689916e-07, "loss": 0.729, "step": 23280 }, { "epoch": 0.8223269121237676, "grad_norm": 2.156613826751709, "learning_rate": 8.052406027358611e-07, "loss": 0.7462, "step": 23281 }, { "epoch": 0.8223622339274755, "grad_norm": 1.6256015300750732, "learning_rate": 8.049293392029539e-07, "loss": 0.7716, "step": 23282 }, { "epoch": 0.8223975557311834, "grad_norm": 1.8422660827636719, "learning_rate": 8.046181305743422e-07, "loss": 0.7754, "step": 23283 }, { "epoch": 0.8224328775348914, "grad_norm": 2.0296099185943604, "learning_rate": 8.043069768540995e-07, "loss": 0.7492, "step": 23284 }, { "epoch": 0.8224681993385993, "grad_norm": 1.7349269390106201, "learning_rate": 8.039958780462997e-07, "loss": 0.8015, "step": 23285 }, { "epoch": 0.8225035211423072, "grad_norm": 1.6393587589263916, "learning_rate": 8.036848341550119e-07, "loss": 0.7537, "step": 23286 }, { "epoch": 0.8225388429460151, "grad_norm": 1.6377891302108765, "learning_rate": 8.033738451843076e-07, "loss": 0.7311, "step": 23287 }, { "epoch": 0.8225741647497229, "grad_norm": 1.6011582612991333, "learning_rate": 8.030629111382593e-07, "loss": 0.8009, "step": 23288 }, { "epoch": 0.8226094865534308, "grad_norm": 1.6476072072982788, "learning_rate": 8.027520320209337e-07, "loss": 0.7619, "step": 23289 }, { "epoch": 0.8226448083571387, "grad_norm": 1.8054474592208862, "learning_rate": 8.024412078363996e-07, "loss": 0.7839, "step": 23290 }, { "epoch": 0.8226801301608466, "grad_norm": 1.9724253416061401, "learning_rate": 8.021304385887279e-07, "loss": 0.759, "step": 23291 }, { "epoch": 0.8227154519645545, "grad_norm": 2.475559949874878, "learning_rate": 8.018197242819825e-07, "loss": 0.7906, "step": 23292 }, { "epoch": 0.8227507737682624, "grad_norm": 1.7505775690078735, "learning_rate": 8.015090649202318e-07, "loss": 0.7854, "step": 23293 }, { "epoch": 0.8227860955719704, "grad_norm": 1.6894673109054565, "learning_rate": 8.011984605075418e-07, "loss": 0.7634, "step": 23294 }, { "epoch": 0.8228214173756783, "grad_norm": 1.6473960876464844, "learning_rate": 8.008879110479762e-07, "loss": 0.7846, "step": 23295 }, { "epoch": 0.8228567391793862, "grad_norm": 1.7223424911499023, "learning_rate": 8.005774165455998e-07, "loss": 0.7338, "step": 23296 }, { "epoch": 0.8228920609830941, "grad_norm": 1.821913480758667, "learning_rate": 8.002669770044775e-07, "loss": 0.7461, "step": 23297 }, { "epoch": 0.822927382786802, "grad_norm": 1.6013158559799194, "learning_rate": 7.999565924286734e-07, "loss": 0.7562, "step": 23298 }, { "epoch": 0.8229627045905099, "grad_norm": 1.705579400062561, "learning_rate": 7.996462628222456e-07, "loss": 0.7394, "step": 23299 }, { "epoch": 0.8229980263942178, "grad_norm": 1.8478856086730957, "learning_rate": 7.993359881892582e-07, "loss": 0.7826, "step": 23300 }, { "epoch": 0.8230333481979257, "grad_norm": 1.9916352033615112, "learning_rate": 7.990257685337727e-07, "loss": 0.7997, "step": 23301 }, { "epoch": 0.8230686700016336, "grad_norm": 1.810524582862854, "learning_rate": 7.98715603859847e-07, "loss": 0.8168, "step": 23302 }, { "epoch": 0.8231039918053416, "grad_norm": 0.9244961738586426, "learning_rate": 7.984054941715419e-07, "loss": 0.5807, "step": 23303 }, { "epoch": 0.8231393136090495, "grad_norm": 1.9020899534225464, "learning_rate": 7.980954394729168e-07, "loss": 0.8045, "step": 23304 }, { "epoch": 0.8231746354127574, "grad_norm": 1.7443128824234009, "learning_rate": 7.977854397680279e-07, "loss": 0.7172, "step": 23305 }, { "epoch": 0.8232099572164653, "grad_norm": 1.590800404548645, "learning_rate": 7.974754950609326e-07, "loss": 0.7503, "step": 23306 }, { "epoch": 0.8232452790201732, "grad_norm": 1.737619400024414, "learning_rate": 7.971656053556886e-07, "loss": 0.7539, "step": 23307 }, { "epoch": 0.8232806008238811, "grad_norm": 2.0320420265197754, "learning_rate": 7.968557706563523e-07, "loss": 0.7727, "step": 23308 }, { "epoch": 0.823315922627589, "grad_norm": 1.671112298965454, "learning_rate": 7.96545990966976e-07, "loss": 0.7671, "step": 23309 }, { "epoch": 0.8233512444312969, "grad_norm": 1.6686327457427979, "learning_rate": 7.962362662916162e-07, "loss": 0.7937, "step": 23310 }, { "epoch": 0.8233865662350048, "grad_norm": 1.611095666885376, "learning_rate": 7.959265966343266e-07, "loss": 0.7526, "step": 23311 }, { "epoch": 0.8234218880387127, "grad_norm": 1.594779372215271, "learning_rate": 7.956169819991583e-07, "loss": 0.7399, "step": 23312 }, { "epoch": 0.8234572098424207, "grad_norm": 1.6472868919372559, "learning_rate": 7.953074223901647e-07, "loss": 0.7828, "step": 23313 }, { "epoch": 0.8234925316461285, "grad_norm": 1.7333500385284424, "learning_rate": 7.949979178113987e-07, "loss": 0.7674, "step": 23314 }, { "epoch": 0.8235278534498364, "grad_norm": 1.750511646270752, "learning_rate": 7.946884682669076e-07, "loss": 0.7781, "step": 23315 }, { "epoch": 0.8235631752535443, "grad_norm": 1.7483232021331787, "learning_rate": 7.943790737607448e-07, "loss": 0.7675, "step": 23316 }, { "epoch": 0.8235984970572522, "grad_norm": 2.0222933292388916, "learning_rate": 7.940697342969572e-07, "loss": 0.7945, "step": 23317 }, { "epoch": 0.8236338188609601, "grad_norm": 1.7663719654083252, "learning_rate": 7.937604498795954e-07, "loss": 0.7626, "step": 23318 }, { "epoch": 0.823669140664668, "grad_norm": 3.707430124282837, "learning_rate": 7.934512205127049e-07, "loss": 0.8245, "step": 23319 }, { "epoch": 0.8237044624683759, "grad_norm": 1.6846905946731567, "learning_rate": 7.931420462003336e-07, "loss": 0.7661, "step": 23320 }, { "epoch": 0.8237397842720838, "grad_norm": 1.8536807298660278, "learning_rate": 7.928329269465302e-07, "loss": 0.7646, "step": 23321 }, { "epoch": 0.8237751060757917, "grad_norm": 1.7236109972000122, "learning_rate": 7.925238627553372e-07, "loss": 0.812, "step": 23322 }, { "epoch": 0.8238104278794997, "grad_norm": 1.754075288772583, "learning_rate": 7.922148536308011e-07, "loss": 0.8042, "step": 23323 }, { "epoch": 0.8238457496832076, "grad_norm": 1.8016653060913086, "learning_rate": 7.919058995769669e-07, "loss": 0.7431, "step": 23324 }, { "epoch": 0.8238810714869155, "grad_norm": 1.678307294845581, "learning_rate": 7.915970005978763e-07, "loss": 0.7675, "step": 23325 }, { "epoch": 0.8239163932906234, "grad_norm": 1.7747305631637573, "learning_rate": 7.912881566975728e-07, "loss": 0.7427, "step": 23326 }, { "epoch": 0.8239517150943313, "grad_norm": 0.8875282406806946, "learning_rate": 7.909793678801003e-07, "loss": 0.572, "step": 23327 }, { "epoch": 0.8239870368980392, "grad_norm": 1.8027688264846802, "learning_rate": 7.906706341494969e-07, "loss": 0.7851, "step": 23328 }, { "epoch": 0.8240223587017471, "grad_norm": 1.7975879907608032, "learning_rate": 7.903619555098058e-07, "loss": 0.7697, "step": 23329 }, { "epoch": 0.824057680505455, "grad_norm": 1.8441935777664185, "learning_rate": 7.900533319650666e-07, "loss": 0.7554, "step": 23330 }, { "epoch": 0.8240930023091629, "grad_norm": 1.9048815965652466, "learning_rate": 7.897447635193173e-07, "loss": 0.7877, "step": 23331 }, { "epoch": 0.8241283241128708, "grad_norm": 1.6939618587493896, "learning_rate": 7.894362501765967e-07, "loss": 0.7716, "step": 23332 }, { "epoch": 0.8241636459165788, "grad_norm": 1.6322917938232422, "learning_rate": 7.89127791940944e-07, "loss": 0.7688, "step": 23333 }, { "epoch": 0.8241989677202867, "grad_norm": 1.5702776908874512, "learning_rate": 7.888193888163942e-07, "loss": 0.7624, "step": 23334 }, { "epoch": 0.8242342895239946, "grad_norm": 1.640308141708374, "learning_rate": 7.885110408069863e-07, "loss": 0.7736, "step": 23335 }, { "epoch": 0.8242696113277025, "grad_norm": 1.810451865196228, "learning_rate": 7.882027479167526e-07, "loss": 0.7697, "step": 23336 }, { "epoch": 0.8243049331314104, "grad_norm": 2.829742908477783, "learning_rate": 7.87894510149731e-07, "loss": 0.7866, "step": 23337 }, { "epoch": 0.8243402549351183, "grad_norm": 1.6285583972930908, "learning_rate": 7.875863275099526e-07, "loss": 0.7556, "step": 23338 }, { "epoch": 0.8243755767388262, "grad_norm": 1.6799811124801636, "learning_rate": 7.872782000014528e-07, "loss": 0.7389, "step": 23339 }, { "epoch": 0.824410898542534, "grad_norm": 1.6116105318069458, "learning_rate": 7.869701276282654e-07, "loss": 0.759, "step": 23340 }, { "epoch": 0.8244462203462419, "grad_norm": 1.8178558349609375, "learning_rate": 7.866621103944194e-07, "loss": 0.7377, "step": 23341 }, { "epoch": 0.8244815421499498, "grad_norm": 1.785906434059143, "learning_rate": 7.863541483039483e-07, "loss": 0.8076, "step": 23342 }, { "epoch": 0.8245168639536578, "grad_norm": 1.6657862663269043, "learning_rate": 7.860462413608827e-07, "loss": 0.8111, "step": 23343 }, { "epoch": 0.8245521857573657, "grad_norm": 1.9497369527816772, "learning_rate": 7.857383895692505e-07, "loss": 0.757, "step": 23344 }, { "epoch": 0.8245875075610736, "grad_norm": 1.855641484260559, "learning_rate": 7.854305929330824e-07, "loss": 0.7413, "step": 23345 }, { "epoch": 0.8246228293647815, "grad_norm": 1.7643481492996216, "learning_rate": 7.851228514564074e-07, "loss": 0.7587, "step": 23346 }, { "epoch": 0.8246581511684894, "grad_norm": 1.6697275638580322, "learning_rate": 7.848151651432512e-07, "loss": 0.7882, "step": 23347 }, { "epoch": 0.8246934729721973, "grad_norm": 1.7559850215911865, "learning_rate": 7.845075339976415e-07, "loss": 0.7825, "step": 23348 }, { "epoch": 0.8247287947759052, "grad_norm": 2.0173046588897705, "learning_rate": 7.841999580236065e-07, "loss": 0.7563, "step": 23349 }, { "epoch": 0.8247641165796131, "grad_norm": 1.7213923931121826, "learning_rate": 7.838924372251683e-07, "loss": 0.7763, "step": 23350 }, { "epoch": 0.824799438383321, "grad_norm": 1.6758686304092407, "learning_rate": 7.835849716063532e-07, "loss": 0.7614, "step": 23351 }, { "epoch": 0.824834760187029, "grad_norm": 1.6927862167358398, "learning_rate": 7.832775611711868e-07, "loss": 0.7223, "step": 23352 }, { "epoch": 0.8248700819907369, "grad_norm": 1.596627950668335, "learning_rate": 7.82970205923691e-07, "loss": 0.7462, "step": 23353 }, { "epoch": 0.8249054037944448, "grad_norm": 0.9785957336425781, "learning_rate": 7.826629058678875e-07, "loss": 0.5826, "step": 23354 }, { "epoch": 0.8249407255981527, "grad_norm": 1.5371568202972412, "learning_rate": 7.823556610077987e-07, "loss": 0.7472, "step": 23355 }, { "epoch": 0.8249760474018606, "grad_norm": 1.7476356029510498, "learning_rate": 7.82048471347448e-07, "loss": 0.7607, "step": 23356 }, { "epoch": 0.8250113692055685, "grad_norm": 1.6346590518951416, "learning_rate": 7.817413368908522e-07, "loss": 0.7466, "step": 23357 }, { "epoch": 0.8250466910092764, "grad_norm": 1.6941596269607544, "learning_rate": 7.814342576420331e-07, "loss": 0.7397, "step": 23358 }, { "epoch": 0.8250820128129843, "grad_norm": 1.9213191270828247, "learning_rate": 7.811272336050101e-07, "loss": 0.75, "step": 23359 }, { "epoch": 0.8251173346166922, "grad_norm": 1.7305437326431274, "learning_rate": 7.808202647838004e-07, "loss": 0.7536, "step": 23360 }, { "epoch": 0.8251526564204001, "grad_norm": 1.686128854751587, "learning_rate": 7.805133511824215e-07, "loss": 0.7746, "step": 23361 }, { "epoch": 0.825187978224108, "grad_norm": 1.5640666484832764, "learning_rate": 7.802064928048919e-07, "loss": 0.7572, "step": 23362 }, { "epoch": 0.825223300027816, "grad_norm": 1.672305941581726, "learning_rate": 7.798996896552258e-07, "loss": 0.7325, "step": 23363 }, { "epoch": 0.8252586218315239, "grad_norm": 1.7636150121688843, "learning_rate": 7.795929417374387e-07, "loss": 0.7935, "step": 23364 }, { "epoch": 0.8252939436352318, "grad_norm": 1.6136701107025146, "learning_rate": 7.792862490555459e-07, "loss": 0.7798, "step": 23365 }, { "epoch": 0.8253292654389396, "grad_norm": 1.8958566188812256, "learning_rate": 7.789796116135628e-07, "loss": 0.7565, "step": 23366 }, { "epoch": 0.8253645872426475, "grad_norm": 1.7567944526672363, "learning_rate": 7.786730294154993e-07, "loss": 0.7392, "step": 23367 }, { "epoch": 0.8253999090463554, "grad_norm": 2.3125388622283936, "learning_rate": 7.783665024653702e-07, "loss": 0.7593, "step": 23368 }, { "epoch": 0.8254352308500633, "grad_norm": 1.854055643081665, "learning_rate": 7.780600307671876e-07, "loss": 0.7763, "step": 23369 }, { "epoch": 0.8254705526537712, "grad_norm": 1.754905104637146, "learning_rate": 7.777536143249609e-07, "loss": 0.7509, "step": 23370 }, { "epoch": 0.8255058744574791, "grad_norm": 1.7355647087097168, "learning_rate": 7.774472531427018e-07, "loss": 0.7619, "step": 23371 }, { "epoch": 0.825541196261187, "grad_norm": 0.9215748310089111, "learning_rate": 7.771409472244196e-07, "loss": 0.5802, "step": 23372 }, { "epoch": 0.825576518064895, "grad_norm": 1.7339470386505127, "learning_rate": 7.768346965741219e-07, "loss": 0.7565, "step": 23373 }, { "epoch": 0.8256118398686029, "grad_norm": 1.633254885673523, "learning_rate": 7.765285011958173e-07, "loss": 0.786, "step": 23374 }, { "epoch": 0.8256471616723108, "grad_norm": 2.3695549964904785, "learning_rate": 7.762223610935143e-07, "loss": 0.7272, "step": 23375 }, { "epoch": 0.8256824834760187, "grad_norm": 1.6763592958450317, "learning_rate": 7.759162762712203e-07, "loss": 0.7407, "step": 23376 }, { "epoch": 0.8257178052797266, "grad_norm": 1.6119006872177124, "learning_rate": 7.756102467329385e-07, "loss": 0.7211, "step": 23377 }, { "epoch": 0.8257531270834345, "grad_norm": 1.7427937984466553, "learning_rate": 7.753042724826753e-07, "loss": 0.8003, "step": 23378 }, { "epoch": 0.8257884488871424, "grad_norm": 1.8380067348480225, "learning_rate": 7.749983535244376e-07, "loss": 0.7343, "step": 23379 }, { "epoch": 0.8258237706908503, "grad_norm": 1.871217131614685, "learning_rate": 7.746924898622255e-07, "loss": 0.7511, "step": 23380 }, { "epoch": 0.8258590924945582, "grad_norm": 3.912832260131836, "learning_rate": 7.743866815000439e-07, "loss": 0.7503, "step": 23381 }, { "epoch": 0.8258944142982662, "grad_norm": 1.6012624502182007, "learning_rate": 7.740809284418965e-07, "loss": 0.7422, "step": 23382 }, { "epoch": 0.8259297361019741, "grad_norm": 1.6539595127105713, "learning_rate": 7.737752306917823e-07, "loss": 0.7315, "step": 23383 }, { "epoch": 0.825965057905682, "grad_norm": 1.673411250114441, "learning_rate": 7.734695882537035e-07, "loss": 0.7768, "step": 23384 }, { "epoch": 0.8260003797093899, "grad_norm": 1.7881574630737305, "learning_rate": 7.731640011316616e-07, "loss": 0.7411, "step": 23385 }, { "epoch": 0.8260357015130978, "grad_norm": 1.65630042552948, "learning_rate": 7.728584693296531e-07, "loss": 0.7656, "step": 23386 }, { "epoch": 0.8260710233168057, "grad_norm": 2.2025580406188965, "learning_rate": 7.725529928516784e-07, "loss": 0.7396, "step": 23387 }, { "epoch": 0.8261063451205136, "grad_norm": 1.5924084186553955, "learning_rate": 7.722475717017364e-07, "loss": 0.7411, "step": 23388 }, { "epoch": 0.8261416669242215, "grad_norm": 1.8181196451187134, "learning_rate": 7.719422058838227e-07, "loss": 0.7438, "step": 23389 }, { "epoch": 0.8261769887279294, "grad_norm": 1.6924182176589966, "learning_rate": 7.716368954019354e-07, "loss": 0.7542, "step": 23390 }, { "epoch": 0.8262123105316374, "grad_norm": 1.7595750093460083, "learning_rate": 7.71331640260069e-07, "loss": 0.8055, "step": 23391 }, { "epoch": 0.8262476323353452, "grad_norm": 1.614931583404541, "learning_rate": 7.710264404622197e-07, "loss": 0.796, "step": 23392 }, { "epoch": 0.8262829541390531, "grad_norm": 1.6440891027450562, "learning_rate": 7.707212960123805e-07, "loss": 0.7653, "step": 23393 }, { "epoch": 0.826318275942761, "grad_norm": 1.8609727621078491, "learning_rate": 7.70416206914546e-07, "loss": 0.7516, "step": 23394 }, { "epoch": 0.8263535977464689, "grad_norm": 1.6264902353286743, "learning_rate": 7.701111731727107e-07, "loss": 0.753, "step": 23395 }, { "epoch": 0.8263889195501768, "grad_norm": 1.590057611465454, "learning_rate": 7.698061947908636e-07, "loss": 0.7568, "step": 23396 }, { "epoch": 0.8264242413538847, "grad_norm": 1.720248818397522, "learning_rate": 7.695012717729977e-07, "loss": 0.7662, "step": 23397 }, { "epoch": 0.8264595631575926, "grad_norm": 1.5563229322433472, "learning_rate": 7.691964041231054e-07, "loss": 0.7563, "step": 23398 }, { "epoch": 0.8264948849613005, "grad_norm": 1.7411333322525024, "learning_rate": 7.688915918451733e-07, "loss": 0.7796, "step": 23399 }, { "epoch": 0.8265302067650084, "grad_norm": 1.6579453945159912, "learning_rate": 7.685868349431936e-07, "loss": 0.8008, "step": 23400 }, { "epoch": 0.8265655285687163, "grad_norm": 1.9332573413848877, "learning_rate": 7.682821334211548e-07, "loss": 0.7675, "step": 23401 }, { "epoch": 0.8266008503724243, "grad_norm": 1.6341122388839722, "learning_rate": 7.679774872830431e-07, "loss": 0.7853, "step": 23402 }, { "epoch": 0.8266361721761322, "grad_norm": 1.8892353773117065, "learning_rate": 7.676728965328462e-07, "loss": 0.756, "step": 23403 }, { "epoch": 0.8266714939798401, "grad_norm": 1.6524639129638672, "learning_rate": 7.67368361174552e-07, "loss": 0.761, "step": 23404 }, { "epoch": 0.826706815783548, "grad_norm": 1.6237584352493286, "learning_rate": 7.670638812121439e-07, "loss": 0.7415, "step": 23405 }, { "epoch": 0.8267421375872559, "grad_norm": 1.9071894884109497, "learning_rate": 7.667594566496084e-07, "loss": 0.7768, "step": 23406 }, { "epoch": 0.8267774593909638, "grad_norm": 1.7948455810546875, "learning_rate": 7.664550874909299e-07, "loss": 0.748, "step": 23407 }, { "epoch": 0.8268127811946717, "grad_norm": 1.8724427223205566, "learning_rate": 7.66150773740092e-07, "loss": 0.7943, "step": 23408 }, { "epoch": 0.8268481029983796, "grad_norm": 1.764550805091858, "learning_rate": 7.658465154010753e-07, "loss": 0.7735, "step": 23409 }, { "epoch": 0.8268834248020875, "grad_norm": 1.721949577331543, "learning_rate": 7.655423124778633e-07, "loss": 0.7951, "step": 23410 }, { "epoch": 0.8269187466057955, "grad_norm": 1.654232144355774, "learning_rate": 7.652381649744389e-07, "loss": 0.7342, "step": 23411 }, { "epoch": 0.8269540684095034, "grad_norm": 1.492084264755249, "learning_rate": 7.649340728947807e-07, "loss": 0.735, "step": 23412 }, { "epoch": 0.8269893902132113, "grad_norm": 1.7507485151290894, "learning_rate": 7.646300362428688e-07, "loss": 0.7416, "step": 23413 }, { "epoch": 0.8270247120169192, "grad_norm": 1.625198245048523, "learning_rate": 7.643260550226839e-07, "loss": 0.7404, "step": 23414 }, { "epoch": 0.8270600338206271, "grad_norm": 1.6366971731185913, "learning_rate": 7.640221292382022e-07, "loss": 0.75, "step": 23415 }, { "epoch": 0.827095355624335, "grad_norm": 2.8673548698425293, "learning_rate": 7.637182588934028e-07, "loss": 0.7708, "step": 23416 }, { "epoch": 0.8271306774280429, "grad_norm": 1.697284460067749, "learning_rate": 7.634144439922636e-07, "loss": 0.7837, "step": 23417 }, { "epoch": 0.8271659992317507, "grad_norm": 2.1129043102264404, "learning_rate": 7.631106845387586e-07, "loss": 0.7768, "step": 23418 }, { "epoch": 0.8272013210354586, "grad_norm": 1.647610068321228, "learning_rate": 7.628069805368643e-07, "loss": 0.7896, "step": 23419 }, { "epoch": 0.8272366428391665, "grad_norm": 1.7245107889175415, "learning_rate": 7.625033319905573e-07, "loss": 0.7717, "step": 23420 }, { "epoch": 0.8272719646428744, "grad_norm": 1.671493649482727, "learning_rate": 7.621997389038088e-07, "loss": 0.7363, "step": 23421 }, { "epoch": 0.8273072864465824, "grad_norm": 1.723442792892456, "learning_rate": 7.618962012805931e-07, "loss": 0.7817, "step": 23422 }, { "epoch": 0.8273426082502903, "grad_norm": 2.0939552783966064, "learning_rate": 7.61592719124884e-07, "loss": 0.7421, "step": 23423 }, { "epoch": 0.8273779300539982, "grad_norm": 1.9212532043457031, "learning_rate": 7.612892924406535e-07, "loss": 0.7691, "step": 23424 }, { "epoch": 0.8274132518577061, "grad_norm": 1.7049822807312012, "learning_rate": 7.609859212318705e-07, "loss": 0.7571, "step": 23425 }, { "epoch": 0.827448573661414, "grad_norm": 1.7908703088760376, "learning_rate": 7.606826055025068e-07, "loss": 0.794, "step": 23426 }, { "epoch": 0.8274838954651219, "grad_norm": 1.6768158674240112, "learning_rate": 7.60379345256535e-07, "loss": 0.7691, "step": 23427 }, { "epoch": 0.8275192172688298, "grad_norm": 1.5539129972457886, "learning_rate": 7.60076140497919e-07, "loss": 0.7486, "step": 23428 }, { "epoch": 0.8275545390725377, "grad_norm": 1.7690675258636475, "learning_rate": 7.597729912306296e-07, "loss": 0.7894, "step": 23429 }, { "epoch": 0.8275898608762456, "grad_norm": 1.5786195993423462, "learning_rate": 7.59469897458635e-07, "loss": 0.762, "step": 23430 }, { "epoch": 0.8276251826799536, "grad_norm": 1.6663984060287476, "learning_rate": 7.591668591859003e-07, "loss": 0.7702, "step": 23431 }, { "epoch": 0.8276605044836615, "grad_norm": 1.5721861124038696, "learning_rate": 7.588638764163925e-07, "loss": 0.7263, "step": 23432 }, { "epoch": 0.8276958262873694, "grad_norm": 1.6988000869750977, "learning_rate": 7.585609491540768e-07, "loss": 0.7575, "step": 23433 }, { "epoch": 0.8277311480910773, "grad_norm": 1.695480227470398, "learning_rate": 7.582580774029197e-07, "loss": 0.7653, "step": 23434 }, { "epoch": 0.8277664698947852, "grad_norm": 1.5979816913604736, "learning_rate": 7.579552611668822e-07, "loss": 0.7362, "step": 23435 }, { "epoch": 0.8278017916984931, "grad_norm": 1.6268657445907593, "learning_rate": 7.576525004499286e-07, "loss": 0.7414, "step": 23436 }, { "epoch": 0.827837113502201, "grad_norm": 1.5890568494796753, "learning_rate": 7.573497952560233e-07, "loss": 0.7628, "step": 23437 }, { "epoch": 0.8278724353059089, "grad_norm": 1.5727750062942505, "learning_rate": 7.570471455891248e-07, "loss": 0.7485, "step": 23438 }, { "epoch": 0.8279077571096168, "grad_norm": 1.7605903148651123, "learning_rate": 7.567445514531957e-07, "loss": 0.7656, "step": 23439 }, { "epoch": 0.8279430789133247, "grad_norm": 1.683183193206787, "learning_rate": 7.564420128521971e-07, "loss": 0.7585, "step": 23440 }, { "epoch": 0.8279784007170327, "grad_norm": 1.7245697975158691, "learning_rate": 7.56139529790087e-07, "loss": 0.7449, "step": 23441 }, { "epoch": 0.8280137225207406, "grad_norm": 1.5606663227081299, "learning_rate": 7.55837102270825e-07, "loss": 0.7374, "step": 23442 }, { "epoch": 0.8280490443244485, "grad_norm": 1.7057477235794067, "learning_rate": 7.555347302983701e-07, "loss": 0.752, "step": 23443 }, { "epoch": 0.8280843661281563, "grad_norm": 1.621800422668457, "learning_rate": 7.552324138766781e-07, "loss": 0.7611, "step": 23444 }, { "epoch": 0.8281196879318642, "grad_norm": 1.8668675422668457, "learning_rate": 7.54930153009707e-07, "loss": 0.7596, "step": 23445 }, { "epoch": 0.8281550097355721, "grad_norm": 1.7056152820587158, "learning_rate": 7.546279477014112e-07, "loss": 0.7483, "step": 23446 }, { "epoch": 0.82819033153928, "grad_norm": 1.6150891780853271, "learning_rate": 7.543257979557478e-07, "loss": 0.7552, "step": 23447 }, { "epoch": 0.8282256533429879, "grad_norm": 1.7473424673080444, "learning_rate": 7.54023703776669e-07, "loss": 0.8099, "step": 23448 }, { "epoch": 0.8282609751466958, "grad_norm": 1.6994092464447021, "learning_rate": 7.5372166516813e-07, "loss": 0.8174, "step": 23449 }, { "epoch": 0.8282962969504037, "grad_norm": 1.536007285118103, "learning_rate": 7.534196821340845e-07, "loss": 0.7397, "step": 23450 }, { "epoch": 0.8283316187541117, "grad_norm": 1.5463255643844604, "learning_rate": 7.531177546784829e-07, "loss": 0.7091, "step": 23451 }, { "epoch": 0.8283669405578196, "grad_norm": 1.7202104330062866, "learning_rate": 7.528158828052779e-07, "loss": 0.739, "step": 23452 }, { "epoch": 0.8284022623615275, "grad_norm": 2.183901071548462, "learning_rate": 7.525140665184216e-07, "loss": 0.763, "step": 23453 }, { "epoch": 0.8284375841652354, "grad_norm": 1.561859369277954, "learning_rate": 7.522123058218611e-07, "loss": 0.7573, "step": 23454 }, { "epoch": 0.8284729059689433, "grad_norm": 2.1420321464538574, "learning_rate": 7.519106007195476e-07, "loss": 0.7903, "step": 23455 }, { "epoch": 0.8285082277726512, "grad_norm": 1.7880405187606812, "learning_rate": 7.516089512154312e-07, "loss": 0.7846, "step": 23456 }, { "epoch": 0.8285435495763591, "grad_norm": 1.535080075263977, "learning_rate": 7.513073573134566e-07, "loss": 0.7722, "step": 23457 }, { "epoch": 0.828578871380067, "grad_norm": 1.772520899772644, "learning_rate": 7.510058190175729e-07, "loss": 0.7949, "step": 23458 }, { "epoch": 0.8286141931837749, "grad_norm": 1.7121973037719727, "learning_rate": 7.507043363317273e-07, "loss": 0.7892, "step": 23459 }, { "epoch": 0.8286495149874828, "grad_norm": 1.7913448810577393, "learning_rate": 7.504029092598636e-07, "loss": 0.7635, "step": 23460 }, { "epoch": 0.8286848367911908, "grad_norm": 1.7416929006576538, "learning_rate": 7.501015378059273e-07, "loss": 0.7681, "step": 23461 }, { "epoch": 0.8287201585948987, "grad_norm": 1.6669822931289673, "learning_rate": 7.498002219738649e-07, "loss": 0.7535, "step": 23462 }, { "epoch": 0.8287554803986066, "grad_norm": 1.6497530937194824, "learning_rate": 7.494989617676169e-07, "loss": 0.7776, "step": 23463 }, { "epoch": 0.8287908022023145, "grad_norm": 2.506204843521118, "learning_rate": 7.491977571911285e-07, "loss": 0.7456, "step": 23464 }, { "epoch": 0.8288261240060224, "grad_norm": 6.795258045196533, "learning_rate": 7.488966082483401e-07, "loss": 0.7627, "step": 23465 }, { "epoch": 0.8288614458097303, "grad_norm": 1.9107064008712769, "learning_rate": 7.485955149431945e-07, "loss": 0.7589, "step": 23466 }, { "epoch": 0.8288967676134382, "grad_norm": 1.7488274574279785, "learning_rate": 7.482944772796302e-07, "loss": 0.7658, "step": 23467 }, { "epoch": 0.8289320894171461, "grad_norm": 1.7201297283172607, "learning_rate": 7.479934952615892e-07, "loss": 0.7753, "step": 23468 }, { "epoch": 0.828967411220854, "grad_norm": 1.6018378734588623, "learning_rate": 7.476925688930109e-07, "loss": 0.7431, "step": 23469 }, { "epoch": 0.8290027330245618, "grad_norm": 1.7056595087051392, "learning_rate": 7.473916981778317e-07, "loss": 0.7908, "step": 23470 }, { "epoch": 0.8290380548282698, "grad_norm": 1.7588483095169067, "learning_rate": 7.470908831199908e-07, "loss": 0.7505, "step": 23471 }, { "epoch": 0.8290733766319777, "grad_norm": 2.3035056591033936, "learning_rate": 7.46790123723426e-07, "loss": 0.7984, "step": 23472 }, { "epoch": 0.8291086984356856, "grad_norm": 1.6765962839126587, "learning_rate": 7.464894199920714e-07, "loss": 0.7825, "step": 23473 }, { "epoch": 0.8291440202393935, "grad_norm": 1.8352010250091553, "learning_rate": 7.461887719298633e-07, "loss": 0.7442, "step": 23474 }, { "epoch": 0.8291793420431014, "grad_norm": 1.617412805557251, "learning_rate": 7.458881795407386e-07, "loss": 0.7459, "step": 23475 }, { "epoch": 0.8292146638468093, "grad_norm": 7.632914066314697, "learning_rate": 7.455876428286285e-07, "loss": 0.8105, "step": 23476 }, { "epoch": 0.8292499856505172, "grad_norm": 1.5246392488479614, "learning_rate": 7.452871617974672e-07, "loss": 0.742, "step": 23477 }, { "epoch": 0.8292853074542251, "grad_norm": 1.5953696966171265, "learning_rate": 7.449867364511893e-07, "loss": 0.7571, "step": 23478 }, { "epoch": 0.829320629257933, "grad_norm": 1.7204221487045288, "learning_rate": 7.446863667937238e-07, "loss": 0.7873, "step": 23479 }, { "epoch": 0.829355951061641, "grad_norm": 1.8016653060913086, "learning_rate": 7.443860528290031e-07, "loss": 0.7697, "step": 23480 }, { "epoch": 0.8293912728653489, "grad_norm": 1.5269641876220703, "learning_rate": 7.440857945609581e-07, "loss": 0.7403, "step": 23481 }, { "epoch": 0.8294265946690568, "grad_norm": 3.8609941005706787, "learning_rate": 7.4378559199352e-07, "loss": 0.7843, "step": 23482 }, { "epoch": 0.8294619164727647, "grad_norm": 1.731285810470581, "learning_rate": 7.43485445130614e-07, "loss": 0.7739, "step": 23483 }, { "epoch": 0.8294972382764726, "grad_norm": 1.5858540534973145, "learning_rate": 7.431853539761708e-07, "loss": 0.7076, "step": 23484 }, { "epoch": 0.8295325600801805, "grad_norm": 1.557692527770996, "learning_rate": 7.428853185341178e-07, "loss": 0.7702, "step": 23485 }, { "epoch": 0.8295678818838884, "grad_norm": 1.94199538230896, "learning_rate": 7.425853388083809e-07, "loss": 0.7512, "step": 23486 }, { "epoch": 0.8296032036875963, "grad_norm": 1.6000852584838867, "learning_rate": 7.422854148028869e-07, "loss": 0.7465, "step": 23487 }, { "epoch": 0.8296385254913042, "grad_norm": 2.3541083335876465, "learning_rate": 7.419855465215614e-07, "loss": 0.7934, "step": 23488 }, { "epoch": 0.8296738472950121, "grad_norm": 1.677646279335022, "learning_rate": 7.416857339683281e-07, "loss": 0.7774, "step": 23489 }, { "epoch": 0.8297091690987201, "grad_norm": 1.8663297891616821, "learning_rate": 7.413859771471116e-07, "loss": 0.7924, "step": 23490 }, { "epoch": 0.829744490902428, "grad_norm": 1.8557833433151245, "learning_rate": 7.410862760618342e-07, "loss": 0.7734, "step": 23491 }, { "epoch": 0.8297798127061359, "grad_norm": 1.609175682067871, "learning_rate": 7.407866307164207e-07, "loss": 0.7598, "step": 23492 }, { "epoch": 0.8298151345098438, "grad_norm": 1.638718605041504, "learning_rate": 7.404870411147902e-07, "loss": 0.7558, "step": 23493 }, { "epoch": 0.8298504563135517, "grad_norm": 1.8091355562210083, "learning_rate": 7.401875072608645e-07, "loss": 0.7466, "step": 23494 }, { "epoch": 0.8298857781172596, "grad_norm": 2.1613729000091553, "learning_rate": 7.398880291585648e-07, "loss": 0.767, "step": 23495 }, { "epoch": 0.8299210999209674, "grad_norm": 1.8312636613845825, "learning_rate": 7.395886068118086e-07, "loss": 0.7855, "step": 23496 }, { "epoch": 0.8299564217246753, "grad_norm": 1.6456812620162964, "learning_rate": 7.392892402245161e-07, "loss": 0.7612, "step": 23497 }, { "epoch": 0.8299917435283832, "grad_norm": 1.6725142002105713, "learning_rate": 7.389899294006059e-07, "loss": 0.7791, "step": 23498 }, { "epoch": 0.8300270653320911, "grad_norm": 1.7748275995254517, "learning_rate": 7.386906743439937e-07, "loss": 0.7744, "step": 23499 }, { "epoch": 0.830062387135799, "grad_norm": 1.7159197330474854, "learning_rate": 7.383914750585969e-07, "loss": 0.7872, "step": 23500 }, { "epoch": 0.830097708939507, "grad_norm": 1.8013170957565308, "learning_rate": 7.380923315483324e-07, "loss": 0.7826, "step": 23501 }, { "epoch": 0.8301330307432149, "grad_norm": 1.539479374885559, "learning_rate": 7.377932438171143e-07, "loss": 0.7442, "step": 23502 }, { "epoch": 0.8301683525469228, "grad_norm": 1.7600785493850708, "learning_rate": 7.37494211868856e-07, "loss": 0.7833, "step": 23503 }, { "epoch": 0.8302036743506307, "grad_norm": 1.7995409965515137, "learning_rate": 7.371952357074719e-07, "loss": 0.7573, "step": 23504 }, { "epoch": 0.8302389961543386, "grad_norm": 1.5906031131744385, "learning_rate": 7.368963153368769e-07, "loss": 0.7417, "step": 23505 }, { "epoch": 0.8302743179580465, "grad_norm": 1.6343190670013428, "learning_rate": 7.365974507609796e-07, "loss": 0.741, "step": 23506 }, { "epoch": 0.8303096397617544, "grad_norm": 2.63156795501709, "learning_rate": 7.362986419836943e-07, "loss": 0.7577, "step": 23507 }, { "epoch": 0.8303449615654623, "grad_norm": 1.8013097047805786, "learning_rate": 7.359998890089315e-07, "loss": 0.7873, "step": 23508 }, { "epoch": 0.8303802833691702, "grad_norm": 1.6522544622421265, "learning_rate": 7.357011918405998e-07, "loss": 0.7269, "step": 23509 }, { "epoch": 0.8304156051728782, "grad_norm": 1.62260103225708, "learning_rate": 7.35402550482609e-07, "loss": 0.761, "step": 23510 }, { "epoch": 0.8304509269765861, "grad_norm": 1.8675739765167236, "learning_rate": 7.351039649388692e-07, "loss": 0.7705, "step": 23511 }, { "epoch": 0.830486248780294, "grad_norm": 1.6098979711532593, "learning_rate": 7.34805435213286e-07, "loss": 0.7549, "step": 23512 }, { "epoch": 0.8305215705840019, "grad_norm": 1.7827260494232178, "learning_rate": 7.345069613097672e-07, "loss": 0.7306, "step": 23513 }, { "epoch": 0.8305568923877098, "grad_norm": 1.803017258644104, "learning_rate": 7.34208543232221e-07, "loss": 0.7954, "step": 23514 }, { "epoch": 0.8305922141914177, "grad_norm": 1.5708951950073242, "learning_rate": 7.339101809845501e-07, "loss": 0.7631, "step": 23515 }, { "epoch": 0.8306275359951256, "grad_norm": 2.052473545074463, "learning_rate": 7.336118745706605e-07, "loss": 0.7735, "step": 23516 }, { "epoch": 0.8306628577988335, "grad_norm": 1.8240306377410889, "learning_rate": 7.333136239944583e-07, "loss": 0.775, "step": 23517 }, { "epoch": 0.8306981796025414, "grad_norm": 1.5935065746307373, "learning_rate": 7.330154292598441e-07, "loss": 0.7384, "step": 23518 }, { "epoch": 0.8307335014062494, "grad_norm": 1.9078669548034668, "learning_rate": 7.327172903707231e-07, "loss": 0.7856, "step": 23519 }, { "epoch": 0.8307688232099573, "grad_norm": 0.9881807565689087, "learning_rate": 7.324192073309949e-07, "loss": 0.5847, "step": 23520 }, { "epoch": 0.8308041450136652, "grad_norm": 2.7141003608703613, "learning_rate": 7.321211801445627e-07, "loss": 0.7754, "step": 23521 }, { "epoch": 0.8308394668173731, "grad_norm": 1.6957814693450928, "learning_rate": 7.318232088153255e-07, "loss": 0.7757, "step": 23522 }, { "epoch": 0.8308747886210809, "grad_norm": 1.6734012365341187, "learning_rate": 7.315252933471834e-07, "loss": 0.792, "step": 23523 }, { "epoch": 0.8309101104247888, "grad_norm": 1.678680419921875, "learning_rate": 7.312274337440373e-07, "loss": 0.7516, "step": 23524 }, { "epoch": 0.8309454322284967, "grad_norm": 1.7978938817977905, "learning_rate": 7.309296300097829e-07, "loss": 0.7695, "step": 23525 }, { "epoch": 0.8309807540322046, "grad_norm": 1.6853491067886353, "learning_rate": 7.306318821483188e-07, "loss": 0.7708, "step": 23526 }, { "epoch": 0.8310160758359125, "grad_norm": 1.64900541305542, "learning_rate": 7.303341901635436e-07, "loss": 0.7702, "step": 23527 }, { "epoch": 0.8310513976396204, "grad_norm": 2.164987564086914, "learning_rate": 7.300365540593507e-07, "loss": 0.7622, "step": 23528 }, { "epoch": 0.8310867194433283, "grad_norm": 1.685326099395752, "learning_rate": 7.297389738396366e-07, "loss": 0.7517, "step": 23529 }, { "epoch": 0.8311220412470363, "grad_norm": 1.5337334871292114, "learning_rate": 7.294414495082974e-07, "loss": 0.7771, "step": 23530 }, { "epoch": 0.8311573630507442, "grad_norm": 1.620462417602539, "learning_rate": 7.291439810692241e-07, "loss": 0.7493, "step": 23531 }, { "epoch": 0.8311926848544521, "grad_norm": 1.6962639093399048, "learning_rate": 7.288465685263118e-07, "loss": 0.7659, "step": 23532 }, { "epoch": 0.83122800665816, "grad_norm": 1.7399168014526367, "learning_rate": 7.285492118834542e-07, "loss": 0.7288, "step": 23533 }, { "epoch": 0.8312633284618679, "grad_norm": 1.7169781923294067, "learning_rate": 7.282519111445402e-07, "loss": 0.7382, "step": 23534 }, { "epoch": 0.8312986502655758, "grad_norm": 1.777153491973877, "learning_rate": 7.279546663134617e-07, "loss": 0.7757, "step": 23535 }, { "epoch": 0.8313339720692837, "grad_norm": 4.070305824279785, "learning_rate": 7.276574773941115e-07, "loss": 0.7858, "step": 23536 }, { "epoch": 0.8313692938729916, "grad_norm": 1.7062499523162842, "learning_rate": 7.273603443903754e-07, "loss": 0.7632, "step": 23537 }, { "epoch": 0.8314046156766995, "grad_norm": 1.681899905204773, "learning_rate": 7.270632673061451e-07, "loss": 0.7616, "step": 23538 }, { "epoch": 0.8314399374804075, "grad_norm": 2.2328596115112305, "learning_rate": 7.267662461453067e-07, "loss": 0.7923, "step": 23539 }, { "epoch": 0.8314752592841154, "grad_norm": 1.69382643699646, "learning_rate": 7.26469280911749e-07, "loss": 0.7425, "step": 23540 }, { "epoch": 0.8315105810878233, "grad_norm": 1.6774746179580688, "learning_rate": 7.261723716093577e-07, "loss": 0.7879, "step": 23541 }, { "epoch": 0.8315459028915312, "grad_norm": 1.7783844470977783, "learning_rate": 7.258755182420185e-07, "loss": 0.7479, "step": 23542 }, { "epoch": 0.8315812246952391, "grad_norm": 1.707757592201233, "learning_rate": 7.255787208136183e-07, "loss": 0.7556, "step": 23543 }, { "epoch": 0.831616546498947, "grad_norm": 1.6834042072296143, "learning_rate": 7.252819793280392e-07, "loss": 0.7527, "step": 23544 }, { "epoch": 0.8316518683026549, "grad_norm": 1.6967439651489258, "learning_rate": 7.249852937891655e-07, "loss": 0.7576, "step": 23545 }, { "epoch": 0.8316871901063628, "grad_norm": 1.6996678113937378, "learning_rate": 7.246886642008821e-07, "loss": 0.7842, "step": 23546 }, { "epoch": 0.8317225119100707, "grad_norm": 1.6608926057815552, "learning_rate": 7.243920905670688e-07, "loss": 0.7707, "step": 23547 }, { "epoch": 0.8317578337137786, "grad_norm": 1.586406946182251, "learning_rate": 7.240955728916083e-07, "loss": 0.7725, "step": 23548 }, { "epoch": 0.8317931555174864, "grad_norm": 1.7742737531661987, "learning_rate": 7.23799111178381e-07, "loss": 0.809, "step": 23549 }, { "epoch": 0.8318284773211944, "grad_norm": 1.8257685899734497, "learning_rate": 7.235027054312677e-07, "loss": 0.7539, "step": 23550 }, { "epoch": 0.8318637991249023, "grad_norm": 1.9574477672576904, "learning_rate": 7.232063556541463e-07, "loss": 0.8031, "step": 23551 }, { "epoch": 0.8318991209286102, "grad_norm": 1.6277610063552856, "learning_rate": 7.229100618508967e-07, "loss": 0.7776, "step": 23552 }, { "epoch": 0.8319344427323181, "grad_norm": 1.7477918863296509, "learning_rate": 7.226138240253966e-07, "loss": 0.7725, "step": 23553 }, { "epoch": 0.831969764536026, "grad_norm": 1.9675798416137695, "learning_rate": 7.22317642181522e-07, "loss": 0.7891, "step": 23554 }, { "epoch": 0.8320050863397339, "grad_norm": 1.7005704641342163, "learning_rate": 7.220215163231498e-07, "loss": 0.7521, "step": 23555 }, { "epoch": 0.8320404081434418, "grad_norm": 1.7221488952636719, "learning_rate": 7.217254464541584e-07, "loss": 0.7431, "step": 23556 }, { "epoch": 0.8320757299471497, "grad_norm": 1.6292905807495117, "learning_rate": 7.214294325784176e-07, "loss": 0.7751, "step": 23557 }, { "epoch": 0.8321110517508576, "grad_norm": 1.7210966348648071, "learning_rate": 7.211334746998039e-07, "loss": 0.7536, "step": 23558 }, { "epoch": 0.8321463735545656, "grad_norm": 0.9584736227989197, "learning_rate": 7.208375728221911e-07, "loss": 0.6004, "step": 23559 }, { "epoch": 0.8321816953582735, "grad_norm": 1.6956539154052734, "learning_rate": 7.20541726949453e-07, "loss": 0.7569, "step": 23560 }, { "epoch": 0.8322170171619814, "grad_norm": 1.769106388092041, "learning_rate": 7.202459370854592e-07, "loss": 0.7644, "step": 23561 }, { "epoch": 0.8322523389656893, "grad_norm": 1.8249413967132568, "learning_rate": 7.199502032340822e-07, "loss": 0.8003, "step": 23562 }, { "epoch": 0.8322876607693972, "grad_norm": 4.248112678527832, "learning_rate": 7.196545253991933e-07, "loss": 0.7523, "step": 23563 }, { "epoch": 0.8323229825731051, "grad_norm": 1.5557514429092407, "learning_rate": 7.193589035846599e-07, "loss": 0.7709, "step": 23564 }, { "epoch": 0.832358304376813, "grad_norm": 1.619741678237915, "learning_rate": 7.190633377943529e-07, "loss": 0.8068, "step": 23565 }, { "epoch": 0.8323936261805209, "grad_norm": 1.7354458570480347, "learning_rate": 7.187678280321414e-07, "loss": 0.7809, "step": 23566 }, { "epoch": 0.8324289479842288, "grad_norm": 1.517153263092041, "learning_rate": 7.184723743018906e-07, "loss": 0.7411, "step": 23567 }, { "epoch": 0.8324642697879368, "grad_norm": 1.63835871219635, "learning_rate": 7.181769766074686e-07, "loss": 0.768, "step": 23568 }, { "epoch": 0.8324995915916447, "grad_norm": 1.575565218925476, "learning_rate": 7.178816349527424e-07, "loss": 0.7759, "step": 23569 }, { "epoch": 0.8325349133953526, "grad_norm": 1.768640160560608, "learning_rate": 7.175863493415752e-07, "loss": 0.7898, "step": 23570 }, { "epoch": 0.8325702351990605, "grad_norm": 1.7370718717575073, "learning_rate": 7.172911197778327e-07, "loss": 0.7736, "step": 23571 }, { "epoch": 0.8326055570027684, "grad_norm": 1.718682050704956, "learning_rate": 7.169959462653803e-07, "loss": 0.7598, "step": 23572 }, { "epoch": 0.8326408788064763, "grad_norm": 1.529314637184143, "learning_rate": 7.167008288080791e-07, "loss": 0.7529, "step": 23573 }, { "epoch": 0.8326762006101842, "grad_norm": 1.8164726495742798, "learning_rate": 7.164057674097919e-07, "loss": 0.7548, "step": 23574 }, { "epoch": 0.832711522413892, "grad_norm": 1.6507959365844727, "learning_rate": 7.161107620743818e-07, "loss": 0.758, "step": 23575 }, { "epoch": 0.8327468442175999, "grad_norm": 1.6331403255462646, "learning_rate": 7.158158128057091e-07, "loss": 0.7646, "step": 23576 }, { "epoch": 0.8327821660213078, "grad_norm": 1.56436026096344, "learning_rate": 7.155209196076324e-07, "loss": 0.7481, "step": 23577 }, { "epoch": 0.8328174878250157, "grad_norm": 1.9172247648239136, "learning_rate": 7.152260824840124e-07, "loss": 0.7216, "step": 23578 }, { "epoch": 0.8328528096287237, "grad_norm": 1.5798461437225342, "learning_rate": 7.149313014387094e-07, "loss": 0.7525, "step": 23579 }, { "epoch": 0.8328881314324316, "grad_norm": 2.114193916320801, "learning_rate": 7.146365764755786e-07, "loss": 0.7501, "step": 23580 }, { "epoch": 0.8329234532361395, "grad_norm": 1.6780009269714355, "learning_rate": 7.143419075984786e-07, "loss": 0.7496, "step": 23581 }, { "epoch": 0.8329587750398474, "grad_norm": 1.7172212600708008, "learning_rate": 7.140472948112675e-07, "loss": 0.7703, "step": 23582 }, { "epoch": 0.8329940968435553, "grad_norm": 1.8603992462158203, "learning_rate": 7.137527381177989e-07, "loss": 0.7707, "step": 23583 }, { "epoch": 0.8330294186472632, "grad_norm": 1.6804441213607788, "learning_rate": 7.134582375219284e-07, "loss": 0.7439, "step": 23584 }, { "epoch": 0.8330647404509711, "grad_norm": 2.0008363723754883, "learning_rate": 7.131637930275115e-07, "loss": 0.7822, "step": 23585 }, { "epoch": 0.833100062254679, "grad_norm": 1.6307432651519775, "learning_rate": 7.128694046384005e-07, "loss": 0.7531, "step": 23586 }, { "epoch": 0.8331353840583869, "grad_norm": 1.944488286972046, "learning_rate": 7.125750723584485e-07, "loss": 0.7637, "step": 23587 }, { "epoch": 0.8331707058620949, "grad_norm": 1.5536178350448608, "learning_rate": 7.12280796191509e-07, "loss": 0.7528, "step": 23588 }, { "epoch": 0.8332060276658028, "grad_norm": 1.711456060409546, "learning_rate": 7.119865761414319e-07, "loss": 0.7452, "step": 23589 }, { "epoch": 0.8332413494695107, "grad_norm": 1.7109935283660889, "learning_rate": 7.116924122120677e-07, "loss": 0.7843, "step": 23590 }, { "epoch": 0.8332766712732186, "grad_norm": 1.6577366590499878, "learning_rate": 7.113983044072687e-07, "loss": 0.7487, "step": 23591 }, { "epoch": 0.8333119930769265, "grad_norm": 1.7342209815979004, "learning_rate": 7.111042527308815e-07, "loss": 0.7441, "step": 23592 }, { "epoch": 0.8333473148806344, "grad_norm": 1.755387306213379, "learning_rate": 7.108102571867564e-07, "loss": 0.7781, "step": 23593 }, { "epoch": 0.8333826366843423, "grad_norm": 3.0433762073516846, "learning_rate": 7.105163177787395e-07, "loss": 0.7643, "step": 23594 }, { "epoch": 0.8334179584880502, "grad_norm": 1.6622165441513062, "learning_rate": 7.102224345106795e-07, "loss": 0.7854, "step": 23595 }, { "epoch": 0.8334532802917581, "grad_norm": 1.5383503437042236, "learning_rate": 7.099286073864209e-07, "loss": 0.7585, "step": 23596 }, { "epoch": 0.833488602095466, "grad_norm": 1.6194579601287842, "learning_rate": 7.096348364098104e-07, "loss": 0.7305, "step": 23597 }, { "epoch": 0.833523923899174, "grad_norm": 1.8588204383850098, "learning_rate": 7.09341121584694e-07, "loss": 0.7618, "step": 23598 }, { "epoch": 0.8335592457028819, "grad_norm": 0.9369234442710876, "learning_rate": 7.09047462914913e-07, "loss": 0.5575, "step": 23599 }, { "epoch": 0.8335945675065898, "grad_norm": 1.9526859521865845, "learning_rate": 7.08753860404312e-07, "loss": 0.7997, "step": 23600 }, { "epoch": 0.8336298893102976, "grad_norm": 1.7744554281234741, "learning_rate": 7.084603140567353e-07, "loss": 0.777, "step": 23601 }, { "epoch": 0.8336652111140055, "grad_norm": 1.5416074991226196, "learning_rate": 7.08166823876022e-07, "loss": 0.7284, "step": 23602 }, { "epoch": 0.8337005329177134, "grad_norm": 1.6905087232589722, "learning_rate": 7.078733898660145e-07, "loss": 0.7851, "step": 23603 }, { "epoch": 0.8337358547214213, "grad_norm": 1.82318913936615, "learning_rate": 7.075800120305543e-07, "loss": 0.7515, "step": 23604 }, { "epoch": 0.8337711765251292, "grad_norm": 1.9511758089065552, "learning_rate": 7.072866903734787e-07, "loss": 0.7518, "step": 23605 }, { "epoch": 0.8338064983288371, "grad_norm": 1.6583120822906494, "learning_rate": 7.069934248986282e-07, "loss": 0.8007, "step": 23606 }, { "epoch": 0.833841820132545, "grad_norm": 1.832593560218811, "learning_rate": 7.067002156098413e-07, "loss": 0.776, "step": 23607 }, { "epoch": 0.833877141936253, "grad_norm": 1.6571820974349976, "learning_rate": 7.064070625109554e-07, "loss": 0.7681, "step": 23608 }, { "epoch": 0.8339124637399609, "grad_norm": 1.873083233833313, "learning_rate": 7.061139656058058e-07, "loss": 0.7469, "step": 23609 }, { "epoch": 0.8339477855436688, "grad_norm": 1.7480647563934326, "learning_rate": 7.058209248982295e-07, "loss": 0.7694, "step": 23610 }, { "epoch": 0.8339831073473767, "grad_norm": 1.788690209388733, "learning_rate": 7.055279403920628e-07, "loss": 0.7726, "step": 23611 }, { "epoch": 0.8340184291510846, "grad_norm": 1.7394554615020752, "learning_rate": 7.052350120911388e-07, "loss": 0.7745, "step": 23612 }, { "epoch": 0.8340537509547925, "grad_norm": 1.8132303953170776, "learning_rate": 7.049421399992911e-07, "loss": 0.719, "step": 23613 }, { "epoch": 0.8340890727585004, "grad_norm": 1.7114177942276, "learning_rate": 7.046493241203539e-07, "loss": 0.7434, "step": 23614 }, { "epoch": 0.8341243945622083, "grad_norm": 1.5470457077026367, "learning_rate": 7.043565644581585e-07, "loss": 0.7677, "step": 23615 }, { "epoch": 0.8341597163659162, "grad_norm": 1.7372559309005737, "learning_rate": 7.040638610165362e-07, "loss": 0.7925, "step": 23616 }, { "epoch": 0.8341950381696241, "grad_norm": 1.6446068286895752, "learning_rate": 7.037712137993185e-07, "loss": 0.7874, "step": 23617 }, { "epoch": 0.8342303599733321, "grad_norm": 1.6101124286651611, "learning_rate": 7.034786228103374e-07, "loss": 0.7169, "step": 23618 }, { "epoch": 0.83426568177704, "grad_norm": 1.6880892515182495, "learning_rate": 7.031860880534186e-07, "loss": 0.8015, "step": 23619 }, { "epoch": 0.8343010035807479, "grad_norm": 1.8077441453933716, "learning_rate": 7.028936095323929e-07, "loss": 0.76, "step": 23620 }, { "epoch": 0.8343363253844558, "grad_norm": 1.7068285942077637, "learning_rate": 7.026011872510891e-07, "loss": 0.7708, "step": 23621 }, { "epoch": 0.8343716471881637, "grad_norm": 1.6913491487503052, "learning_rate": 7.023088212133317e-07, "loss": 0.78, "step": 23622 }, { "epoch": 0.8344069689918716, "grad_norm": 1.7805628776550293, "learning_rate": 7.020165114229488e-07, "loss": 0.777, "step": 23623 }, { "epoch": 0.8344422907955795, "grad_norm": 1.624520182609558, "learning_rate": 7.017242578837669e-07, "loss": 0.7641, "step": 23624 }, { "epoch": 0.8344776125992874, "grad_norm": 1.5719119310379028, "learning_rate": 7.014320605996083e-07, "loss": 0.716, "step": 23625 }, { "epoch": 0.8345129344029953, "grad_norm": 1.7605156898498535, "learning_rate": 7.011399195742996e-07, "loss": 0.8082, "step": 23626 }, { "epoch": 0.8345482562067031, "grad_norm": 1.760899305343628, "learning_rate": 7.008478348116642e-07, "loss": 0.7952, "step": 23627 }, { "epoch": 0.834583578010411, "grad_norm": 1.6793402433395386, "learning_rate": 7.005558063155233e-07, "loss": 0.7893, "step": 23628 }, { "epoch": 0.834618899814119, "grad_norm": 1.657496690750122, "learning_rate": 7.002638340897e-07, "loss": 0.7534, "step": 23629 }, { "epoch": 0.8346542216178269, "grad_norm": 1.6002144813537598, "learning_rate": 6.99971918138016e-07, "loss": 0.7791, "step": 23630 }, { "epoch": 0.8346895434215348, "grad_norm": 2.995128870010376, "learning_rate": 6.996800584642915e-07, "loss": 0.755, "step": 23631 }, { "epoch": 0.8347248652252427, "grad_norm": 1.5277099609375, "learning_rate": 6.993882550723452e-07, "loss": 0.7433, "step": 23632 }, { "epoch": 0.8347601870289506, "grad_norm": 2.1329457759857178, "learning_rate": 6.990965079659961e-07, "loss": 0.791, "step": 23633 }, { "epoch": 0.8347955088326585, "grad_norm": 2.369158983230591, "learning_rate": 6.988048171490652e-07, "loss": 0.7662, "step": 23634 }, { "epoch": 0.8348308306363664, "grad_norm": 1.6966086626052856, "learning_rate": 6.985131826253672e-07, "loss": 0.7773, "step": 23635 }, { "epoch": 0.8348661524400743, "grad_norm": 1.8946558237075806, "learning_rate": 6.9822160439872e-07, "loss": 0.7515, "step": 23636 }, { "epoch": 0.8349014742437822, "grad_norm": 1.506367564201355, "learning_rate": 6.979300824729413e-07, "loss": 0.7451, "step": 23637 }, { "epoch": 0.8349367960474902, "grad_norm": 1.5587782859802246, "learning_rate": 6.976386168518435e-07, "loss": 0.7281, "step": 23638 }, { "epoch": 0.8349721178511981, "grad_norm": 1.5273399353027344, "learning_rate": 6.973472075392424e-07, "loss": 0.7409, "step": 23639 }, { "epoch": 0.835007439654906, "grad_norm": 1.6886672973632812, "learning_rate": 6.970558545389538e-07, "loss": 0.739, "step": 23640 }, { "epoch": 0.8350427614586139, "grad_norm": 1.7143961191177368, "learning_rate": 6.967645578547877e-07, "loss": 0.7423, "step": 23641 }, { "epoch": 0.8350780832623218, "grad_norm": 1.7012686729431152, "learning_rate": 6.964733174905586e-07, "loss": 0.7957, "step": 23642 }, { "epoch": 0.8351134050660297, "grad_norm": 1.6740115880966187, "learning_rate": 6.961821334500785e-07, "loss": 0.7256, "step": 23643 }, { "epoch": 0.8351487268697376, "grad_norm": 2.1588897705078125, "learning_rate": 6.958910057371571e-07, "loss": 0.7429, "step": 23644 }, { "epoch": 0.8351840486734455, "grad_norm": 1.5070956945419312, "learning_rate": 6.955999343556047e-07, "loss": 0.7392, "step": 23645 }, { "epoch": 0.8352193704771534, "grad_norm": 1.6694821119308472, "learning_rate": 6.953089193092321e-07, "loss": 0.8065, "step": 23646 }, { "epoch": 0.8352546922808614, "grad_norm": 1.7507238388061523, "learning_rate": 6.950179606018465e-07, "loss": 0.7356, "step": 23647 }, { "epoch": 0.8352900140845693, "grad_norm": 1.7232156991958618, "learning_rate": 6.947270582372578e-07, "loss": 0.7756, "step": 23648 }, { "epoch": 0.8353253358882772, "grad_norm": 1.8421025276184082, "learning_rate": 6.944362122192705e-07, "loss": 0.8021, "step": 23649 }, { "epoch": 0.8353606576919851, "grad_norm": 1.7811263799667358, "learning_rate": 6.941454225516941e-07, "loss": 0.7847, "step": 23650 }, { "epoch": 0.835395979495693, "grad_norm": 1.685104250907898, "learning_rate": 6.938546892383319e-07, "loss": 0.7677, "step": 23651 }, { "epoch": 0.8354313012994009, "grad_norm": 1.7787182331085205, "learning_rate": 6.935640122829895e-07, "loss": 0.7617, "step": 23652 }, { "epoch": 0.8354666231031087, "grad_norm": 1.5857127904891968, "learning_rate": 6.932733916894735e-07, "loss": 0.7396, "step": 23653 }, { "epoch": 0.8355019449068166, "grad_norm": 1.7856026887893677, "learning_rate": 6.929828274615846e-07, "loss": 0.7909, "step": 23654 }, { "epoch": 0.8355372667105245, "grad_norm": 1.6626816987991333, "learning_rate": 6.926923196031266e-07, "loss": 0.7672, "step": 23655 }, { "epoch": 0.8355725885142324, "grad_norm": 1.859198808670044, "learning_rate": 6.924018681179034e-07, "loss": 0.7106, "step": 23656 }, { "epoch": 0.8356079103179403, "grad_norm": 1.6280884742736816, "learning_rate": 6.921114730097134e-07, "loss": 0.7909, "step": 23657 }, { "epoch": 0.8356432321216483, "grad_norm": 1.6271145343780518, "learning_rate": 6.918211342823583e-07, "loss": 0.7274, "step": 23658 }, { "epoch": 0.8356785539253562, "grad_norm": 1.610716462135315, "learning_rate": 6.915308519396402e-07, "loss": 0.7574, "step": 23659 }, { "epoch": 0.8357138757290641, "grad_norm": 1.7024940252304077, "learning_rate": 6.912406259853549e-07, "loss": 0.7617, "step": 23660 }, { "epoch": 0.835749197532772, "grad_norm": 1.577386498451233, "learning_rate": 6.909504564233022e-07, "loss": 0.7393, "step": 23661 }, { "epoch": 0.8357845193364799, "grad_norm": 1.7193399667739868, "learning_rate": 6.906603432572811e-07, "loss": 0.7495, "step": 23662 }, { "epoch": 0.8358198411401878, "grad_norm": 1.8704164028167725, "learning_rate": 6.903702864910861e-07, "loss": 0.7587, "step": 23663 }, { "epoch": 0.8358551629438957, "grad_norm": 1.749224305152893, "learning_rate": 6.900802861285149e-07, "loss": 0.7683, "step": 23664 }, { "epoch": 0.8358904847476036, "grad_norm": 1.7080786228179932, "learning_rate": 6.897903421733626e-07, "loss": 0.7575, "step": 23665 }, { "epoch": 0.8359258065513115, "grad_norm": 1.771415114402771, "learning_rate": 6.895004546294254e-07, "loss": 0.759, "step": 23666 }, { "epoch": 0.8359611283550195, "grad_norm": 1.605126142501831, "learning_rate": 6.892106235004959e-07, "loss": 0.746, "step": 23667 }, { "epoch": 0.8359964501587274, "grad_norm": 1.8842065334320068, "learning_rate": 6.889208487903665e-07, "loss": 0.7722, "step": 23668 }, { "epoch": 0.8360317719624353, "grad_norm": 1.7703403234481812, "learning_rate": 6.886311305028315e-07, "loss": 0.7665, "step": 23669 }, { "epoch": 0.8360670937661432, "grad_norm": 1.6927834749221802, "learning_rate": 6.883414686416806e-07, "loss": 0.7696, "step": 23670 }, { "epoch": 0.8361024155698511, "grad_norm": 1.6706396341323853, "learning_rate": 6.880518632107058e-07, "loss": 0.7335, "step": 23671 }, { "epoch": 0.836137737373559, "grad_norm": 1.6454665660858154, "learning_rate": 6.877623142136991e-07, "loss": 0.7595, "step": 23672 }, { "epoch": 0.8361730591772669, "grad_norm": 1.7150300741195679, "learning_rate": 6.874728216544479e-07, "loss": 0.7715, "step": 23673 }, { "epoch": 0.8362083809809748, "grad_norm": 1.6645019054412842, "learning_rate": 6.87183385536741e-07, "loss": 0.7704, "step": 23674 }, { "epoch": 0.8362437027846827, "grad_norm": 1.0485124588012695, "learning_rate": 6.86894005864368e-07, "loss": 0.5649, "step": 23675 }, { "epoch": 0.8362790245883907, "grad_norm": 2.9894354343414307, "learning_rate": 6.86604682641116e-07, "loss": 0.7666, "step": 23676 }, { "epoch": 0.8363143463920986, "grad_norm": 1.564698338508606, "learning_rate": 6.863154158707702e-07, "loss": 0.75, "step": 23677 }, { "epoch": 0.8363496681958065, "grad_norm": 1.6748510599136353, "learning_rate": 6.86026205557117e-07, "loss": 0.7995, "step": 23678 }, { "epoch": 0.8363849899995143, "grad_norm": 1.7584187984466553, "learning_rate": 6.857370517039436e-07, "loss": 0.7734, "step": 23679 }, { "epoch": 0.8364203118032222, "grad_norm": 1.7457817792892456, "learning_rate": 6.854479543150311e-07, "loss": 0.7864, "step": 23680 }, { "epoch": 0.8364556336069301, "grad_norm": 1.726093053817749, "learning_rate": 6.851589133941649e-07, "loss": 0.7558, "step": 23681 }, { "epoch": 0.836490955410638, "grad_norm": 1.7965492010116577, "learning_rate": 6.848699289451289e-07, "loss": 0.8121, "step": 23682 }, { "epoch": 0.8365262772143459, "grad_norm": 1.623215913772583, "learning_rate": 6.845810009717029e-07, "loss": 0.7511, "step": 23683 }, { "epoch": 0.8365615990180538, "grad_norm": 1.7374992370605469, "learning_rate": 6.8429212947767e-07, "loss": 0.7396, "step": 23684 }, { "epoch": 0.8365969208217617, "grad_norm": 2.0727055072784424, "learning_rate": 6.840033144668113e-07, "loss": 0.7857, "step": 23685 }, { "epoch": 0.8366322426254696, "grad_norm": 1.8110802173614502, "learning_rate": 6.837145559429054e-07, "loss": 0.7813, "step": 23686 }, { "epoch": 0.8366675644291776, "grad_norm": 1.4902781248092651, "learning_rate": 6.83425853909731e-07, "loss": 0.7229, "step": 23687 }, { "epoch": 0.8367028862328855, "grad_norm": 1.5124495029449463, "learning_rate": 6.83137208371068e-07, "loss": 0.7155, "step": 23688 }, { "epoch": 0.8367382080365934, "grad_norm": 1.738304853439331, "learning_rate": 6.828486193306949e-07, "loss": 0.7884, "step": 23689 }, { "epoch": 0.8367735298403013, "grad_norm": 1.6952197551727295, "learning_rate": 6.825600867923865e-07, "loss": 0.7687, "step": 23690 }, { "epoch": 0.8368088516440092, "grad_norm": 1.9490543603897095, "learning_rate": 6.822716107599198e-07, "loss": 0.8034, "step": 23691 }, { "epoch": 0.8368441734477171, "grad_norm": 1.5952672958374023, "learning_rate": 6.819831912370717e-07, "loss": 0.7735, "step": 23692 }, { "epoch": 0.836879495251425, "grad_norm": 2.315913438796997, "learning_rate": 6.816948282276147e-07, "loss": 0.7458, "step": 23693 }, { "epoch": 0.8369148170551329, "grad_norm": 2.025463819503784, "learning_rate": 6.814065217353244e-07, "loss": 0.7571, "step": 23694 }, { "epoch": 0.8369501388588408, "grad_norm": 2.0069096088409424, "learning_rate": 6.811182717639742e-07, "loss": 0.8029, "step": 23695 }, { "epoch": 0.8369854606625488, "grad_norm": 1.5836608409881592, "learning_rate": 6.808300783173354e-07, "loss": 0.7802, "step": 23696 }, { "epoch": 0.8370207824662567, "grad_norm": 1.6518974304199219, "learning_rate": 6.80541941399181e-07, "loss": 0.7703, "step": 23697 }, { "epoch": 0.8370561042699646, "grad_norm": 1.6914494037628174, "learning_rate": 6.802538610132825e-07, "loss": 0.8027, "step": 23698 }, { "epoch": 0.8370914260736725, "grad_norm": 1.7166011333465576, "learning_rate": 6.799658371634082e-07, "loss": 0.7795, "step": 23699 }, { "epoch": 0.8371267478773804, "grad_norm": 1.7124080657958984, "learning_rate": 6.796778698533296e-07, "loss": 0.7478, "step": 23700 }, { "epoch": 0.8371620696810883, "grad_norm": 1.7274911403656006, "learning_rate": 6.793899590868153e-07, "loss": 0.7917, "step": 23701 }, { "epoch": 0.8371973914847962, "grad_norm": 1.6567206382751465, "learning_rate": 6.791021048676321e-07, "loss": 0.7561, "step": 23702 }, { "epoch": 0.8372327132885041, "grad_norm": 1.6996601819992065, "learning_rate": 6.788143071995485e-07, "loss": 0.7903, "step": 23703 }, { "epoch": 0.837268035092212, "grad_norm": 2.0046300888061523, "learning_rate": 6.785265660863316e-07, "loss": 0.796, "step": 23704 }, { "epoch": 0.8373033568959198, "grad_norm": 1.7463836669921875, "learning_rate": 6.78238881531747e-07, "loss": 0.7765, "step": 23705 }, { "epoch": 0.8373386786996277, "grad_norm": 1.6456193923950195, "learning_rate": 6.779512535395588e-07, "loss": 0.7706, "step": 23706 }, { "epoch": 0.8373740005033357, "grad_norm": 1.6313753128051758, "learning_rate": 6.776636821135318e-07, "loss": 0.7558, "step": 23707 }, { "epoch": 0.8374093223070436, "grad_norm": 1.6432799100875854, "learning_rate": 6.773761672574308e-07, "loss": 0.7531, "step": 23708 }, { "epoch": 0.8374446441107515, "grad_norm": 1.6919194459915161, "learning_rate": 6.770887089750173e-07, "loss": 0.7598, "step": 23709 }, { "epoch": 0.8374799659144594, "grad_norm": 0.9457328915596008, "learning_rate": 6.768013072700536e-07, "loss": 0.5607, "step": 23710 }, { "epoch": 0.8375152877181673, "grad_norm": 1.7599238157272339, "learning_rate": 6.765139621463035e-07, "loss": 0.7852, "step": 23711 }, { "epoch": 0.8375506095218752, "grad_norm": 1.7610893249511719, "learning_rate": 6.762266736075246e-07, "loss": 0.7641, "step": 23712 }, { "epoch": 0.8375859313255831, "grad_norm": 1.728389024734497, "learning_rate": 6.759394416574778e-07, "loss": 0.7678, "step": 23713 }, { "epoch": 0.837621253129291, "grad_norm": 1.858464002609253, "learning_rate": 6.756522662999243e-07, "loss": 0.738, "step": 23714 }, { "epoch": 0.8376565749329989, "grad_norm": 2.2882018089294434, "learning_rate": 6.753651475386197e-07, "loss": 0.7169, "step": 23715 }, { "epoch": 0.8376918967367069, "grad_norm": 1.8303643465042114, "learning_rate": 6.750780853773237e-07, "loss": 0.7501, "step": 23716 }, { "epoch": 0.8377272185404148, "grad_norm": 1.756197214126587, "learning_rate": 6.747910798197932e-07, "loss": 0.7663, "step": 23717 }, { "epoch": 0.8377625403441227, "grad_norm": 1.7043931484222412, "learning_rate": 6.745041308697831e-07, "loss": 0.7694, "step": 23718 }, { "epoch": 0.8377978621478306, "grad_norm": 1.9306573867797852, "learning_rate": 6.742172385310497e-07, "loss": 0.8019, "step": 23719 }, { "epoch": 0.8378331839515385, "grad_norm": 0.9261112809181213, "learning_rate": 6.73930402807349e-07, "loss": 0.5555, "step": 23720 }, { "epoch": 0.8378685057552464, "grad_norm": 1.7301441431045532, "learning_rate": 6.736436237024329e-07, "loss": 0.752, "step": 23721 }, { "epoch": 0.8379038275589543, "grad_norm": 1.9147502183914185, "learning_rate": 6.733569012200575e-07, "loss": 0.7612, "step": 23722 }, { "epoch": 0.8379391493626622, "grad_norm": 1.6342768669128418, "learning_rate": 6.730702353639717e-07, "loss": 0.7908, "step": 23723 }, { "epoch": 0.8379744711663701, "grad_norm": 1.593902826309204, "learning_rate": 6.727836261379306e-07, "loss": 0.7668, "step": 23724 }, { "epoch": 0.838009792970078, "grad_norm": 1.638952374458313, "learning_rate": 6.724970735456832e-07, "loss": 0.764, "step": 23725 }, { "epoch": 0.838045114773786, "grad_norm": 2.0120112895965576, "learning_rate": 6.722105775909804e-07, "loss": 0.7444, "step": 23726 }, { "epoch": 0.8380804365774939, "grad_norm": 1.0027378797531128, "learning_rate": 6.719241382775732e-07, "loss": 0.5955, "step": 23727 }, { "epoch": 0.8381157583812018, "grad_norm": 1.6835627555847168, "learning_rate": 6.716377556092079e-07, "loss": 0.756, "step": 23728 }, { "epoch": 0.8381510801849097, "grad_norm": 1.7966787815093994, "learning_rate": 6.713514295896345e-07, "loss": 0.7347, "step": 23729 }, { "epoch": 0.8381864019886176, "grad_norm": 1.6611614227294922, "learning_rate": 6.71065160222601e-07, "loss": 0.7274, "step": 23730 }, { "epoch": 0.8382217237923254, "grad_norm": 1.6875289678573608, "learning_rate": 6.707789475118515e-07, "loss": 0.7897, "step": 23731 }, { "epoch": 0.8382570455960333, "grad_norm": 1.9168739318847656, "learning_rate": 6.704927914611331e-07, "loss": 0.7522, "step": 23732 }, { "epoch": 0.8382923673997412, "grad_norm": 3.4005556106567383, "learning_rate": 6.702066920741917e-07, "loss": 0.7693, "step": 23733 }, { "epoch": 0.8383276892034491, "grad_norm": 1.5515056848526, "learning_rate": 6.699206493547722e-07, "loss": 0.7448, "step": 23734 }, { "epoch": 0.838363011007157, "grad_norm": 1.7327237129211426, "learning_rate": 6.696346633066158e-07, "loss": 0.7507, "step": 23735 }, { "epoch": 0.838398332810865, "grad_norm": 4.06801176071167, "learning_rate": 6.69348733933467e-07, "loss": 0.768, "step": 23736 }, { "epoch": 0.8384336546145729, "grad_norm": 2.01576828956604, "learning_rate": 6.690628612390693e-07, "loss": 0.7616, "step": 23737 }, { "epoch": 0.8384689764182808, "grad_norm": 1.7520650625228882, "learning_rate": 6.68777045227162e-07, "loss": 0.7683, "step": 23738 }, { "epoch": 0.8385042982219887, "grad_norm": 1.6307451725006104, "learning_rate": 6.684912859014858e-07, "loss": 0.7863, "step": 23739 }, { "epoch": 0.8385396200256966, "grad_norm": 1.644313931465149, "learning_rate": 6.682055832657825e-07, "loss": 0.7489, "step": 23740 }, { "epoch": 0.8385749418294045, "grad_norm": 1.7776700258255005, "learning_rate": 6.679199373237904e-07, "loss": 0.7723, "step": 23741 }, { "epoch": 0.8386102636331124, "grad_norm": 1.767693281173706, "learning_rate": 6.676343480792474e-07, "loss": 0.7307, "step": 23742 }, { "epoch": 0.8386455854368203, "grad_norm": 1.8268433809280396, "learning_rate": 6.67348815535891e-07, "loss": 0.7392, "step": 23743 }, { "epoch": 0.8386809072405282, "grad_norm": 2.3254241943359375, "learning_rate": 6.670633396974602e-07, "loss": 0.8136, "step": 23744 }, { "epoch": 0.8387162290442362, "grad_norm": 1.5981870889663696, "learning_rate": 6.667779205676883e-07, "loss": 0.7935, "step": 23745 }, { "epoch": 0.8387515508479441, "grad_norm": 1.895799994468689, "learning_rate": 6.664925581503129e-07, "loss": 0.779, "step": 23746 }, { "epoch": 0.838786872651652, "grad_norm": 1.643942952156067, "learning_rate": 6.662072524490693e-07, "loss": 0.7337, "step": 23747 }, { "epoch": 0.8388221944553599, "grad_norm": 1.771003246307373, "learning_rate": 6.659220034676894e-07, "loss": 0.7476, "step": 23748 }, { "epoch": 0.8388575162590678, "grad_norm": 1.8264553546905518, "learning_rate": 6.656368112099076e-07, "loss": 0.8204, "step": 23749 }, { "epoch": 0.8388928380627757, "grad_norm": 2.1347544193267822, "learning_rate": 6.653516756794576e-07, "loss": 0.7372, "step": 23750 }, { "epoch": 0.8389281598664836, "grad_norm": 1.6976054906845093, "learning_rate": 6.650665968800684e-07, "loss": 0.7551, "step": 23751 }, { "epoch": 0.8389634816701915, "grad_norm": 1.761555552482605, "learning_rate": 6.647815748154735e-07, "loss": 0.7908, "step": 23752 }, { "epoch": 0.8389988034738994, "grad_norm": 1.709929347038269, "learning_rate": 6.644966094894034e-07, "loss": 0.7404, "step": 23753 }, { "epoch": 0.8390341252776073, "grad_norm": 1.655121922492981, "learning_rate": 6.642117009055854e-07, "loss": 0.7593, "step": 23754 }, { "epoch": 0.8390694470813153, "grad_norm": 1.9616388082504272, "learning_rate": 6.639268490677492e-07, "loss": 0.7389, "step": 23755 }, { "epoch": 0.8391047688850232, "grad_norm": 1.656822681427002, "learning_rate": 6.636420539796251e-07, "loss": 0.7725, "step": 23756 }, { "epoch": 0.839140090688731, "grad_norm": 3.7059779167175293, "learning_rate": 6.633573156449368e-07, "loss": 0.7665, "step": 23757 }, { "epoch": 0.8391754124924389, "grad_norm": 1.6469082832336426, "learning_rate": 6.630726340674138e-07, "loss": 0.7723, "step": 23758 }, { "epoch": 0.8392107342961468, "grad_norm": 1.633514165878296, "learning_rate": 6.62788009250781e-07, "loss": 0.785, "step": 23759 }, { "epoch": 0.8392460560998547, "grad_norm": 2.5190281867980957, "learning_rate": 6.625034411987641e-07, "loss": 0.7569, "step": 23760 }, { "epoch": 0.8392813779035626, "grad_norm": 3.391500949859619, "learning_rate": 6.622189299150855e-07, "loss": 0.74, "step": 23761 }, { "epoch": 0.8393166997072705, "grad_norm": 1.6300017833709717, "learning_rate": 6.619344754034701e-07, "loss": 0.761, "step": 23762 }, { "epoch": 0.8393520215109784, "grad_norm": 1.6133887767791748, "learning_rate": 6.616500776676415e-07, "loss": 0.7963, "step": 23763 }, { "epoch": 0.8393873433146863, "grad_norm": 1.8487048149108887, "learning_rate": 6.613657367113207e-07, "loss": 0.7761, "step": 23764 }, { "epoch": 0.8394226651183943, "grad_norm": 1.9153101444244385, "learning_rate": 6.610814525382292e-07, "loss": 0.76, "step": 23765 }, { "epoch": 0.8394579869221022, "grad_norm": 1.746268391609192, "learning_rate": 6.607972251520894e-07, "loss": 0.7705, "step": 23766 }, { "epoch": 0.8394933087258101, "grad_norm": 1.6454317569732666, "learning_rate": 6.605130545566185e-07, "loss": 0.783, "step": 23767 }, { "epoch": 0.839528630529518, "grad_norm": 1.4928488731384277, "learning_rate": 6.602289407555373e-07, "loss": 0.7533, "step": 23768 }, { "epoch": 0.8395639523332259, "grad_norm": 1.658379316329956, "learning_rate": 6.599448837525646e-07, "loss": 0.7696, "step": 23769 }, { "epoch": 0.8395992741369338, "grad_norm": 2.976818561553955, "learning_rate": 6.596608835514167e-07, "loss": 0.7448, "step": 23770 }, { "epoch": 0.8396345959406417, "grad_norm": 1.5710091590881348, "learning_rate": 6.593769401558108e-07, "loss": 0.7245, "step": 23771 }, { "epoch": 0.8396699177443496, "grad_norm": 1.6466758251190186, "learning_rate": 6.590930535694651e-07, "loss": 0.7596, "step": 23772 }, { "epoch": 0.8397052395480575, "grad_norm": 1.7606682777404785, "learning_rate": 6.588092237960924e-07, "loss": 0.7557, "step": 23773 }, { "epoch": 0.8397405613517654, "grad_norm": 2.0069687366485596, "learning_rate": 6.585254508394084e-07, "loss": 0.7398, "step": 23774 }, { "epoch": 0.8397758831554734, "grad_norm": 1.7620309591293335, "learning_rate": 6.582417347031284e-07, "loss": 0.7789, "step": 23775 }, { "epoch": 0.8398112049591813, "grad_norm": 1.8178004026412964, "learning_rate": 6.579580753909631e-07, "loss": 0.7829, "step": 23776 }, { "epoch": 0.8398465267628892, "grad_norm": 1.6192559003829956, "learning_rate": 6.576744729066265e-07, "loss": 0.7588, "step": 23777 }, { "epoch": 0.8398818485665971, "grad_norm": 1.5828980207443237, "learning_rate": 6.57390927253831e-07, "loss": 0.7528, "step": 23778 }, { "epoch": 0.839917170370305, "grad_norm": 2.052884340286255, "learning_rate": 6.571074384362869e-07, "loss": 0.7585, "step": 23779 }, { "epoch": 0.8399524921740129, "grad_norm": 1.8966286182403564, "learning_rate": 6.568240064577031e-07, "loss": 0.7986, "step": 23780 }, { "epoch": 0.8399878139777208, "grad_norm": 1.6523085832595825, "learning_rate": 6.565406313217903e-07, "loss": 0.7565, "step": 23781 }, { "epoch": 0.8400231357814287, "grad_norm": 1.6817551851272583, "learning_rate": 6.562573130322586e-07, "loss": 0.8113, "step": 23782 }, { "epoch": 0.8400584575851365, "grad_norm": 2.178978204727173, "learning_rate": 6.559740515928131e-07, "loss": 0.7635, "step": 23783 }, { "epoch": 0.8400937793888444, "grad_norm": 1.932384967803955, "learning_rate": 6.556908470071627e-07, "loss": 0.786, "step": 23784 }, { "epoch": 0.8401291011925524, "grad_norm": 1.8015766143798828, "learning_rate": 6.554076992790154e-07, "loss": 0.7788, "step": 23785 }, { "epoch": 0.8401644229962603, "grad_norm": 1.00542151927948, "learning_rate": 6.551246084120738e-07, "loss": 0.5886, "step": 23786 }, { "epoch": 0.8401997447999682, "grad_norm": 1.7430267333984375, "learning_rate": 6.54841574410045e-07, "loss": 0.761, "step": 23787 }, { "epoch": 0.8402350666036761, "grad_norm": 1.6449625492095947, "learning_rate": 6.545585972766339e-07, "loss": 0.7591, "step": 23788 }, { "epoch": 0.840270388407384, "grad_norm": 1.6969211101531982, "learning_rate": 6.542756770155418e-07, "loss": 0.7593, "step": 23789 }, { "epoch": 0.8403057102110919, "grad_norm": 1.9595615863800049, "learning_rate": 6.539928136304724e-07, "loss": 0.7303, "step": 23790 }, { "epoch": 0.8403410320147998, "grad_norm": 1.57500159740448, "learning_rate": 6.537100071251284e-07, "loss": 0.8054, "step": 23791 }, { "epoch": 0.8403763538185077, "grad_norm": 1.6475307941436768, "learning_rate": 6.53427257503212e-07, "loss": 0.7799, "step": 23792 }, { "epoch": 0.8404116756222156, "grad_norm": 1.6728299856185913, "learning_rate": 6.531445647684214e-07, "loss": 0.7767, "step": 23793 }, { "epoch": 0.8404469974259235, "grad_norm": 1.6270220279693604, "learning_rate": 6.528619289244576e-07, "loss": 0.7415, "step": 23794 }, { "epoch": 0.8404823192296315, "grad_norm": 1.974542498588562, "learning_rate": 6.525793499750211e-07, "loss": 0.7865, "step": 23795 }, { "epoch": 0.8405176410333394, "grad_norm": 1.8804121017456055, "learning_rate": 6.522968279238085e-07, "loss": 0.8147, "step": 23796 }, { "epoch": 0.8405529628370473, "grad_norm": 1.5542278289794922, "learning_rate": 6.520143627745168e-07, "loss": 0.7547, "step": 23797 }, { "epoch": 0.8405882846407552, "grad_norm": 1.6070780754089355, "learning_rate": 6.517319545308448e-07, "loss": 0.773, "step": 23798 }, { "epoch": 0.8406236064444631, "grad_norm": 1.6402145624160767, "learning_rate": 6.514496031964862e-07, "loss": 0.7573, "step": 23799 }, { "epoch": 0.840658928248171, "grad_norm": 1.8594194650650024, "learning_rate": 6.51167308775138e-07, "loss": 0.7725, "step": 23800 }, { "epoch": 0.8406942500518789, "grad_norm": 1.7225390672683716, "learning_rate": 6.508850712704945e-07, "loss": 0.7349, "step": 23801 }, { "epoch": 0.8407295718555868, "grad_norm": 1.5044188499450684, "learning_rate": 6.50602890686251e-07, "loss": 0.7397, "step": 23802 }, { "epoch": 0.8407648936592947, "grad_norm": 1.8721274137496948, "learning_rate": 6.50320767026098e-07, "loss": 0.7844, "step": 23803 }, { "epoch": 0.8408002154630027, "grad_norm": 1.6410316228866577, "learning_rate": 6.500387002937292e-07, "loss": 0.7711, "step": 23804 }, { "epoch": 0.8408355372667106, "grad_norm": 1.7263299226760864, "learning_rate": 6.497566904928377e-07, "loss": 0.8019, "step": 23805 }, { "epoch": 0.8408708590704185, "grad_norm": 1.7220642566680908, "learning_rate": 6.49474737627111e-07, "loss": 0.779, "step": 23806 }, { "epoch": 0.8409061808741264, "grad_norm": 1.887316107749939, "learning_rate": 6.491928417002413e-07, "loss": 0.7782, "step": 23807 }, { "epoch": 0.8409415026778343, "grad_norm": 1.7213208675384521, "learning_rate": 6.489110027159196e-07, "loss": 0.7977, "step": 23808 }, { "epoch": 0.8409768244815421, "grad_norm": 1.5655168294906616, "learning_rate": 6.486292206778311e-07, "loss": 0.7659, "step": 23809 }, { "epoch": 0.84101214628525, "grad_norm": 1.778363585472107, "learning_rate": 6.483474955896652e-07, "loss": 0.8015, "step": 23810 }, { "epoch": 0.8410474680889579, "grad_norm": 2.1507930755615234, "learning_rate": 6.48065827455111e-07, "loss": 0.7543, "step": 23811 }, { "epoch": 0.8410827898926658, "grad_norm": 1.637683391571045, "learning_rate": 6.477842162778514e-07, "loss": 0.7511, "step": 23812 }, { "epoch": 0.8411181116963737, "grad_norm": 1.588921308517456, "learning_rate": 6.475026620615743e-07, "loss": 0.7588, "step": 23813 }, { "epoch": 0.8411534335000816, "grad_norm": 1.6472582817077637, "learning_rate": 6.472211648099652e-07, "loss": 0.7613, "step": 23814 }, { "epoch": 0.8411887553037896, "grad_norm": 0.9026537537574768, "learning_rate": 6.469397245267073e-07, "loss": 0.5909, "step": 23815 }, { "epoch": 0.8412240771074975, "grad_norm": 1.8246146440505981, "learning_rate": 6.466583412154825e-07, "loss": 0.7676, "step": 23816 }, { "epoch": 0.8412593989112054, "grad_norm": 1.6532008647918701, "learning_rate": 6.463770148799752e-07, "loss": 0.753, "step": 23817 }, { "epoch": 0.8412947207149133, "grad_norm": 1.8563064336776733, "learning_rate": 6.460957455238687e-07, "loss": 0.7501, "step": 23818 }, { "epoch": 0.8413300425186212, "grad_norm": 1.5240575075149536, "learning_rate": 6.458145331508409e-07, "loss": 0.7627, "step": 23819 }, { "epoch": 0.8413653643223291, "grad_norm": 1.653373122215271, "learning_rate": 6.455333777645745e-07, "loss": 0.7757, "step": 23820 }, { "epoch": 0.841400686126037, "grad_norm": 1.8072012662887573, "learning_rate": 6.452522793687493e-07, "loss": 0.7313, "step": 23821 }, { "epoch": 0.8414360079297449, "grad_norm": 1.6719074249267578, "learning_rate": 6.44971237967043e-07, "loss": 0.7105, "step": 23822 }, { "epoch": 0.8414713297334528, "grad_norm": 1.8387069702148438, "learning_rate": 6.446902535631345e-07, "loss": 0.7859, "step": 23823 }, { "epoch": 0.8415066515371608, "grad_norm": 1.7138850688934326, "learning_rate": 6.44409326160702e-07, "loss": 0.7655, "step": 23824 }, { "epoch": 0.8415419733408687, "grad_norm": 1.7111599445343018, "learning_rate": 6.441284557634203e-07, "loss": 0.7487, "step": 23825 }, { "epoch": 0.8415772951445766, "grad_norm": 1.7053762674331665, "learning_rate": 6.438476423749668e-07, "loss": 0.788, "step": 23826 }, { "epoch": 0.8416126169482845, "grad_norm": 1.7026281356811523, "learning_rate": 6.43566885999018e-07, "loss": 0.7652, "step": 23827 }, { "epoch": 0.8416479387519924, "grad_norm": 1.7741951942443848, "learning_rate": 6.432861866392454e-07, "loss": 0.7233, "step": 23828 }, { "epoch": 0.8416832605557003, "grad_norm": 1.83214271068573, "learning_rate": 6.430055442993238e-07, "loss": 0.7854, "step": 23829 }, { "epoch": 0.8417185823594082, "grad_norm": 1.9942142963409424, "learning_rate": 6.427249589829282e-07, "loss": 0.7792, "step": 23830 }, { "epoch": 0.8417539041631161, "grad_norm": 1.727304220199585, "learning_rate": 6.424444306937284e-07, "loss": 0.7648, "step": 23831 }, { "epoch": 0.841789225966824, "grad_norm": 1.6263724565505981, "learning_rate": 6.421639594353962e-07, "loss": 0.7588, "step": 23832 }, { "epoch": 0.841824547770532, "grad_norm": 1.6579577922821045, "learning_rate": 6.41883545211604e-07, "loss": 0.7399, "step": 23833 }, { "epoch": 0.8418598695742399, "grad_norm": 1.7368148565292358, "learning_rate": 6.416031880260209e-07, "loss": 0.7852, "step": 23834 }, { "epoch": 0.8418951913779477, "grad_norm": 1.6592329740524292, "learning_rate": 6.413228878823153e-07, "loss": 0.7289, "step": 23835 }, { "epoch": 0.8419305131816556, "grad_norm": 1.6997528076171875, "learning_rate": 6.410426447841556e-07, "loss": 0.7912, "step": 23836 }, { "epoch": 0.8419658349853635, "grad_norm": 2.1203935146331787, "learning_rate": 6.407624587352124e-07, "loss": 0.7715, "step": 23837 }, { "epoch": 0.8420011567890714, "grad_norm": 1.7913600206375122, "learning_rate": 6.404823297391488e-07, "loss": 0.703, "step": 23838 }, { "epoch": 0.8420364785927793, "grad_norm": 1.7259432077407837, "learning_rate": 6.40202257799633e-07, "loss": 0.7792, "step": 23839 }, { "epoch": 0.8420718003964872, "grad_norm": 1.8109004497528076, "learning_rate": 6.399222429203322e-07, "loss": 0.7318, "step": 23840 }, { "epoch": 0.8421071222001951, "grad_norm": 1.716373085975647, "learning_rate": 6.396422851049077e-07, "loss": 0.7458, "step": 23841 }, { "epoch": 0.842142444003903, "grad_norm": 1.5862867832183838, "learning_rate": 6.393623843570263e-07, "loss": 0.7593, "step": 23842 }, { "epoch": 0.842177765807611, "grad_norm": 1.820333480834961, "learning_rate": 6.390825406803502e-07, "loss": 0.8195, "step": 23843 }, { "epoch": 0.8422130876113189, "grad_norm": 1.6859019994735718, "learning_rate": 6.388027540785418e-07, "loss": 0.7398, "step": 23844 }, { "epoch": 0.8422484094150268, "grad_norm": 2.1099178791046143, "learning_rate": 6.385230245552626e-07, "loss": 0.7405, "step": 23845 }, { "epoch": 0.8422837312187347, "grad_norm": 1.7393078804016113, "learning_rate": 6.382433521141756e-07, "loss": 0.7608, "step": 23846 }, { "epoch": 0.8423190530224426, "grad_norm": 1.8917416334152222, "learning_rate": 6.379637367589386e-07, "loss": 0.7624, "step": 23847 }, { "epoch": 0.8423543748261505, "grad_norm": 1.6929357051849365, "learning_rate": 6.37684178493212e-07, "loss": 0.7924, "step": 23848 }, { "epoch": 0.8423896966298584, "grad_norm": 1.602184534072876, "learning_rate": 6.374046773206555e-07, "loss": 0.7792, "step": 23849 }, { "epoch": 0.8424250184335663, "grad_norm": 1.7090821266174316, "learning_rate": 6.371252332449268e-07, "loss": 0.7404, "step": 23850 }, { "epoch": 0.8424603402372742, "grad_norm": 1.7268726825714111, "learning_rate": 6.368458462696819e-07, "loss": 0.7554, "step": 23851 }, { "epoch": 0.8424956620409821, "grad_norm": 1.6354397535324097, "learning_rate": 6.365665163985801e-07, "loss": 0.7552, "step": 23852 }, { "epoch": 0.84253098384469, "grad_norm": 1.720988392829895, "learning_rate": 6.362872436352752e-07, "loss": 0.7773, "step": 23853 }, { "epoch": 0.842566305648398, "grad_norm": 2.170463800430298, "learning_rate": 6.360080279834213e-07, "loss": 0.7763, "step": 23854 }, { "epoch": 0.8426016274521059, "grad_norm": 1.754776954650879, "learning_rate": 6.35728869446674e-07, "loss": 0.7734, "step": 23855 }, { "epoch": 0.8426369492558138, "grad_norm": 1.675950050354004, "learning_rate": 6.35449768028688e-07, "loss": 0.7695, "step": 23856 }, { "epoch": 0.8426722710595217, "grad_norm": 1.5744637250900269, "learning_rate": 6.35170723733114e-07, "loss": 0.7412, "step": 23857 }, { "epoch": 0.8427075928632296, "grad_norm": 1.8084720373153687, "learning_rate": 6.348917365636049e-07, "loss": 0.7629, "step": 23858 }, { "epoch": 0.8427429146669375, "grad_norm": 1.5989165306091309, "learning_rate": 6.346128065238122e-07, "loss": 0.749, "step": 23859 }, { "epoch": 0.8427782364706454, "grad_norm": 2.3819520473480225, "learning_rate": 6.343339336173876e-07, "loss": 0.7388, "step": 23860 }, { "epoch": 0.8428135582743532, "grad_norm": 1.6294529438018799, "learning_rate": 6.340551178479787e-07, "loss": 0.7422, "step": 23861 }, { "epoch": 0.8428488800780611, "grad_norm": 1.5295840501785278, "learning_rate": 6.337763592192358e-07, "loss": 0.7523, "step": 23862 }, { "epoch": 0.842884201881769, "grad_norm": 1.8565425872802734, "learning_rate": 6.334976577348084e-07, "loss": 0.7711, "step": 23863 }, { "epoch": 0.842919523685477, "grad_norm": 1.9242008924484253, "learning_rate": 6.332190133983418e-07, "loss": 0.781, "step": 23864 }, { "epoch": 0.8429548454891849, "grad_norm": 1.7999181747436523, "learning_rate": 6.329404262134837e-07, "loss": 0.7522, "step": 23865 }, { "epoch": 0.8429901672928928, "grad_norm": 1.6651129722595215, "learning_rate": 6.326618961838815e-07, "loss": 0.7741, "step": 23866 }, { "epoch": 0.8430254890966007, "grad_norm": 1.6896709203720093, "learning_rate": 6.323834233131781e-07, "loss": 0.7547, "step": 23867 }, { "epoch": 0.8430608109003086, "grad_norm": 2.0973241329193115, "learning_rate": 6.321050076050194e-07, "loss": 0.7445, "step": 23868 }, { "epoch": 0.8430961327040165, "grad_norm": 1.7486803531646729, "learning_rate": 6.318266490630509e-07, "loss": 0.7656, "step": 23869 }, { "epoch": 0.8431314545077244, "grad_norm": 1.7225251197814941, "learning_rate": 6.31548347690914e-07, "loss": 0.7689, "step": 23870 }, { "epoch": 0.8431667763114323, "grad_norm": 1.7073317766189575, "learning_rate": 6.312701034922497e-07, "loss": 0.7557, "step": 23871 }, { "epoch": 0.8432020981151402, "grad_norm": 1.6098185777664185, "learning_rate": 6.309919164707013e-07, "loss": 0.7729, "step": 23872 }, { "epoch": 0.8432374199188482, "grad_norm": 1.6617661714553833, "learning_rate": 6.307137866299101e-07, "loss": 0.7736, "step": 23873 }, { "epoch": 0.8432727417225561, "grad_norm": 1.7590550184249878, "learning_rate": 6.304357139735151e-07, "loss": 0.7473, "step": 23874 }, { "epoch": 0.843308063526264, "grad_norm": 2.164625406265259, "learning_rate": 6.301576985051555e-07, "loss": 0.7738, "step": 23875 }, { "epoch": 0.8433433853299719, "grad_norm": 1.7434786558151245, "learning_rate": 6.298797402284718e-07, "loss": 0.7707, "step": 23876 }, { "epoch": 0.8433787071336798, "grad_norm": 1.8032437562942505, "learning_rate": 6.296018391470998e-07, "loss": 0.7687, "step": 23877 }, { "epoch": 0.8434140289373877, "grad_norm": 1.7488163709640503, "learning_rate": 6.293239952646768e-07, "loss": 0.78, "step": 23878 }, { "epoch": 0.8434493507410956, "grad_norm": 1.5794556140899658, "learning_rate": 6.290462085848415e-07, "loss": 0.7598, "step": 23879 }, { "epoch": 0.8434846725448035, "grad_norm": 1.7614668607711792, "learning_rate": 6.287684791112258e-07, "loss": 0.7538, "step": 23880 }, { "epoch": 0.8435199943485114, "grad_norm": 1.6811387538909912, "learning_rate": 6.284908068474677e-07, "loss": 0.7574, "step": 23881 }, { "epoch": 0.8435553161522193, "grad_norm": 1.6333591938018799, "learning_rate": 6.282131917972001e-07, "loss": 0.725, "step": 23882 }, { "epoch": 0.8435906379559273, "grad_norm": 1.743408441543579, "learning_rate": 6.279356339640563e-07, "loss": 0.7576, "step": 23883 }, { "epoch": 0.8436259597596352, "grad_norm": 1.7261648178100586, "learning_rate": 6.276581333516685e-07, "loss": 0.8133, "step": 23884 }, { "epoch": 0.8436612815633431, "grad_norm": 1.662657618522644, "learning_rate": 6.273806899636703e-07, "loss": 0.7752, "step": 23885 }, { "epoch": 0.843696603367051, "grad_norm": 1.6567147970199585, "learning_rate": 6.271033038036906e-07, "loss": 0.7448, "step": 23886 }, { "epoch": 0.8437319251707588, "grad_norm": 1.5732765197753906, "learning_rate": 6.268259748753608e-07, "loss": 0.7587, "step": 23887 }, { "epoch": 0.8437672469744667, "grad_norm": 1.9201507568359375, "learning_rate": 6.26548703182312e-07, "loss": 0.728, "step": 23888 }, { "epoch": 0.8438025687781746, "grad_norm": 1.7082122564315796, "learning_rate": 6.262714887281712e-07, "loss": 0.7636, "step": 23889 }, { "epoch": 0.8438378905818825, "grad_norm": 1.73839271068573, "learning_rate": 6.259943315165662e-07, "loss": 0.7922, "step": 23890 }, { "epoch": 0.8438732123855904, "grad_norm": 1.7936242818832397, "learning_rate": 6.257172315511251e-07, "loss": 0.7332, "step": 23891 }, { "epoch": 0.8439085341892983, "grad_norm": 1.6955456733703613, "learning_rate": 6.254401888354756e-07, "loss": 0.7601, "step": 23892 }, { "epoch": 0.8439438559930063, "grad_norm": 1.6374564170837402, "learning_rate": 6.251632033732414e-07, "loss": 0.7347, "step": 23893 }, { "epoch": 0.8439791777967142, "grad_norm": 1.962952971458435, "learning_rate": 6.24886275168049e-07, "loss": 0.7904, "step": 23894 }, { "epoch": 0.8440144996004221, "grad_norm": 1.7400486469268799, "learning_rate": 6.24609404223524e-07, "loss": 0.7353, "step": 23895 }, { "epoch": 0.84404982140413, "grad_norm": 1.6622631549835205, "learning_rate": 6.243325905432868e-07, "loss": 0.7531, "step": 23896 }, { "epoch": 0.8440851432078379, "grad_norm": 1.7617161273956299, "learning_rate": 6.240558341309627e-07, "loss": 0.8104, "step": 23897 }, { "epoch": 0.8441204650115458, "grad_norm": 1.6733272075653076, "learning_rate": 6.23779134990174e-07, "loss": 0.7661, "step": 23898 }, { "epoch": 0.8441557868152537, "grad_norm": 1.785030484199524, "learning_rate": 6.235024931245404e-07, "loss": 0.7595, "step": 23899 }, { "epoch": 0.8441911086189616, "grad_norm": 1.778916358947754, "learning_rate": 6.232259085376835e-07, "loss": 0.7708, "step": 23900 }, { "epoch": 0.8442264304226695, "grad_norm": 1.558724284172058, "learning_rate": 6.229493812332244e-07, "loss": 0.74, "step": 23901 }, { "epoch": 0.8442617522263774, "grad_norm": 1.6617053747177124, "learning_rate": 6.226729112147795e-07, "loss": 0.7327, "step": 23902 }, { "epoch": 0.8442970740300854, "grad_norm": 1.825790524482727, "learning_rate": 6.223964984859692e-07, "loss": 0.7566, "step": 23903 }, { "epoch": 0.8443323958337933, "grad_norm": 1.6690691709518433, "learning_rate": 6.221201430504114e-07, "loss": 0.7433, "step": 23904 }, { "epoch": 0.8443677176375012, "grad_norm": 1.673450231552124, "learning_rate": 6.218438449117214e-07, "loss": 0.7534, "step": 23905 }, { "epoch": 0.8444030394412091, "grad_norm": 1.7254778146743774, "learning_rate": 6.215676040735158e-07, "loss": 0.7536, "step": 23906 }, { "epoch": 0.844438361244917, "grad_norm": 2.399423599243164, "learning_rate": 6.212914205394116e-07, "loss": 0.7832, "step": 23907 }, { "epoch": 0.8444736830486249, "grad_norm": 1.717786192893982, "learning_rate": 6.210152943130222e-07, "loss": 0.745, "step": 23908 }, { "epoch": 0.8445090048523328, "grad_norm": 1.6995699405670166, "learning_rate": 6.207392253979605e-07, "loss": 0.8057, "step": 23909 }, { "epoch": 0.8445443266560407, "grad_norm": 1.6146575212478638, "learning_rate": 6.2046321379784e-07, "loss": 0.7605, "step": 23910 }, { "epoch": 0.8445796484597486, "grad_norm": 1.7842211723327637, "learning_rate": 6.201872595162755e-07, "loss": 0.7709, "step": 23911 }, { "epoch": 0.8446149702634566, "grad_norm": 2.0615336894989014, "learning_rate": 6.199113625568754e-07, "loss": 0.7717, "step": 23912 }, { "epoch": 0.8446502920671644, "grad_norm": 1.0942153930664062, "learning_rate": 6.19635522923252e-07, "loss": 0.5568, "step": 23913 }, { "epoch": 0.8446856138708723, "grad_norm": 2.164649248123169, "learning_rate": 6.193597406190166e-07, "loss": 0.7339, "step": 23914 }, { "epoch": 0.8447209356745802, "grad_norm": 1.6999919414520264, "learning_rate": 6.190840156477768e-07, "loss": 0.7583, "step": 23915 }, { "epoch": 0.8447562574782881, "grad_norm": 1.7742819786071777, "learning_rate": 6.188083480131413e-07, "loss": 0.7871, "step": 23916 }, { "epoch": 0.844791579281996, "grad_norm": 1.6622214317321777, "learning_rate": 6.185327377187195e-07, "loss": 0.7627, "step": 23917 }, { "epoch": 0.8448269010857039, "grad_norm": 1.6192820072174072, "learning_rate": 6.182571847681168e-07, "loss": 0.7236, "step": 23918 }, { "epoch": 0.8448622228894118, "grad_norm": 1.823157787322998, "learning_rate": 6.179816891649409e-07, "loss": 0.7461, "step": 23919 }, { "epoch": 0.8448975446931197, "grad_norm": 0.9806815385818481, "learning_rate": 6.177062509127962e-07, "loss": 0.5598, "step": 23920 }, { "epoch": 0.8449328664968276, "grad_norm": 2.030437469482422, "learning_rate": 6.174308700152898e-07, "loss": 0.7694, "step": 23921 }, { "epoch": 0.8449681883005355, "grad_norm": 1.6051019430160522, "learning_rate": 6.171555464760232e-07, "loss": 0.7641, "step": 23922 }, { "epoch": 0.8450035101042435, "grad_norm": 1.5492061376571655, "learning_rate": 6.168802802986007e-07, "loss": 0.747, "step": 23923 }, { "epoch": 0.8450388319079514, "grad_norm": 2.001893997192383, "learning_rate": 6.166050714866262e-07, "loss": 0.7743, "step": 23924 }, { "epoch": 0.8450741537116593, "grad_norm": 1.8738574981689453, "learning_rate": 6.163299200437006e-07, "loss": 0.7369, "step": 23925 }, { "epoch": 0.8451094755153672, "grad_norm": 1.670236587524414, "learning_rate": 6.160548259734245e-07, "loss": 0.7661, "step": 23926 }, { "epoch": 0.8451447973190751, "grad_norm": 0.9569573998451233, "learning_rate": 6.157797892793993e-07, "loss": 0.5775, "step": 23927 }, { "epoch": 0.845180119122783, "grad_norm": 1.664250373840332, "learning_rate": 6.155048099652228e-07, "loss": 0.7726, "step": 23928 }, { "epoch": 0.8452154409264909, "grad_norm": 1.5857622623443604, "learning_rate": 6.152298880344954e-07, "loss": 0.7452, "step": 23929 }, { "epoch": 0.8452507627301988, "grad_norm": 1.8113268613815308, "learning_rate": 6.149550234908147e-07, "loss": 0.7998, "step": 23930 }, { "epoch": 0.8452860845339067, "grad_norm": 1.7289644479751587, "learning_rate": 6.146802163377796e-07, "loss": 0.7563, "step": 23931 }, { "epoch": 0.8453214063376147, "grad_norm": 1.727405071258545, "learning_rate": 6.144054665789845e-07, "loss": 0.7295, "step": 23932 }, { "epoch": 0.8453567281413226, "grad_norm": 1.536582112312317, "learning_rate": 6.141307742180264e-07, "loss": 0.7766, "step": 23933 }, { "epoch": 0.8453920499450305, "grad_norm": 1.77443528175354, "learning_rate": 6.138561392585013e-07, "loss": 0.7981, "step": 23934 }, { "epoch": 0.8454273717487384, "grad_norm": 2.021437406539917, "learning_rate": 6.135815617040014e-07, "loss": 0.7216, "step": 23935 }, { "epoch": 0.8454626935524463, "grad_norm": 1.7740042209625244, "learning_rate": 6.133070415581211e-07, "loss": 0.7259, "step": 23936 }, { "epoch": 0.8454980153561542, "grad_norm": 1.5669454336166382, "learning_rate": 6.130325788244551e-07, "loss": 0.7348, "step": 23937 }, { "epoch": 0.8455333371598621, "grad_norm": 2.7000997066497803, "learning_rate": 6.127581735065929e-07, "loss": 0.7557, "step": 23938 }, { "epoch": 0.84556865896357, "grad_norm": 2.0283780097961426, "learning_rate": 6.124838256081272e-07, "loss": 0.7851, "step": 23939 }, { "epoch": 0.8456039807672778, "grad_norm": 1.7515718936920166, "learning_rate": 6.122095351326491e-07, "loss": 0.7939, "step": 23940 }, { "epoch": 0.8456393025709857, "grad_norm": 2.019071340560913, "learning_rate": 6.119353020837471e-07, "loss": 0.773, "step": 23941 }, { "epoch": 0.8456746243746937, "grad_norm": 1.6300159692764282, "learning_rate": 6.116611264650107e-07, "loss": 0.7417, "step": 23942 }, { "epoch": 0.8457099461784016, "grad_norm": 1.7493512630462646, "learning_rate": 6.113870082800299e-07, "loss": 0.7609, "step": 23943 }, { "epoch": 0.8457452679821095, "grad_norm": 1.6578071117401123, "learning_rate": 6.111129475323907e-07, "loss": 0.7582, "step": 23944 }, { "epoch": 0.8457805897858174, "grad_norm": 1.604878306388855, "learning_rate": 6.10838944225679e-07, "loss": 0.752, "step": 23945 }, { "epoch": 0.8458159115895253, "grad_norm": 1.6066137552261353, "learning_rate": 6.105649983634826e-07, "loss": 0.7469, "step": 23946 }, { "epoch": 0.8458512333932332, "grad_norm": 1.6577227115631104, "learning_rate": 6.102911099493875e-07, "loss": 0.7605, "step": 23947 }, { "epoch": 0.8458865551969411, "grad_norm": 1.7407073974609375, "learning_rate": 6.100172789869757e-07, "loss": 0.7694, "step": 23948 }, { "epoch": 0.845921877000649, "grad_norm": 1.8026844263076782, "learning_rate": 6.097435054798329e-07, "loss": 0.7709, "step": 23949 }, { "epoch": 0.8459571988043569, "grad_norm": 1.8332860469818115, "learning_rate": 6.094697894315427e-07, "loss": 0.7826, "step": 23950 }, { "epoch": 0.8459925206080648, "grad_norm": 1.8102788925170898, "learning_rate": 6.091961308456851e-07, "loss": 0.7921, "step": 23951 }, { "epoch": 0.8460278424117728, "grad_norm": 1.7113531827926636, "learning_rate": 6.089225297258439e-07, "loss": 0.7705, "step": 23952 }, { "epoch": 0.8460631642154807, "grad_norm": 1.803019404411316, "learning_rate": 6.086489860755995e-07, "loss": 0.7576, "step": 23953 }, { "epoch": 0.8460984860191886, "grad_norm": 1.7074577808380127, "learning_rate": 6.083754998985314e-07, "loss": 0.7639, "step": 23954 }, { "epoch": 0.8461338078228965, "grad_norm": 1.9390801191329956, "learning_rate": 6.081020711982189e-07, "loss": 0.7668, "step": 23955 }, { "epoch": 0.8461691296266044, "grad_norm": 1.5775796175003052, "learning_rate": 6.078286999782418e-07, "loss": 0.7643, "step": 23956 }, { "epoch": 0.8462044514303123, "grad_norm": 1.6462303400039673, "learning_rate": 6.075553862421757e-07, "loss": 0.7429, "step": 23957 }, { "epoch": 0.8462397732340202, "grad_norm": 1.584161639213562, "learning_rate": 6.072821299935994e-07, "loss": 0.7387, "step": 23958 }, { "epoch": 0.8462750950377281, "grad_norm": 1.5802812576293945, "learning_rate": 6.0700893123609e-07, "loss": 0.7506, "step": 23959 }, { "epoch": 0.846310416841436, "grad_norm": 1.8264957666397095, "learning_rate": 6.067357899732207e-07, "loss": 0.752, "step": 23960 }, { "epoch": 0.846345738645144, "grad_norm": 1.6059538125991821, "learning_rate": 6.064627062085676e-07, "loss": 0.7724, "step": 23961 }, { "epoch": 0.8463810604488519, "grad_norm": 1.817920446395874, "learning_rate": 6.061896799457057e-07, "loss": 0.7431, "step": 23962 }, { "epoch": 0.8464163822525598, "grad_norm": 1.7626688480377197, "learning_rate": 6.059167111882075e-07, "loss": 0.7557, "step": 23963 }, { "epoch": 0.8464517040562677, "grad_norm": 1.6821553707122803, "learning_rate": 6.056437999396442e-07, "loss": 0.7633, "step": 23964 }, { "epoch": 0.8464870258599756, "grad_norm": 2.9246909618377686, "learning_rate": 6.053709462035889e-07, "loss": 0.7428, "step": 23965 }, { "epoch": 0.8465223476636834, "grad_norm": 1.9455087184906006, "learning_rate": 6.050981499836134e-07, "loss": 0.7466, "step": 23966 }, { "epoch": 0.8465576694673913, "grad_norm": 1.790334701538086, "learning_rate": 6.048254112832863e-07, "loss": 0.7319, "step": 23967 }, { "epoch": 0.8465929912710992, "grad_norm": 1.7994409799575806, "learning_rate": 6.045527301061782e-07, "loss": 0.75, "step": 23968 }, { "epoch": 0.8466283130748071, "grad_norm": 1.7108807563781738, "learning_rate": 6.042801064558584e-07, "loss": 0.7616, "step": 23969 }, { "epoch": 0.846663634878515, "grad_norm": 1.7914637327194214, "learning_rate": 6.04007540335893e-07, "loss": 0.7634, "step": 23970 }, { "epoch": 0.846698956682223, "grad_norm": 1.8425447940826416, "learning_rate": 6.037350317498508e-07, "loss": 0.7619, "step": 23971 }, { "epoch": 0.8467342784859309, "grad_norm": 1.572791337966919, "learning_rate": 6.034625807012995e-07, "loss": 0.7466, "step": 23972 }, { "epoch": 0.8467696002896388, "grad_norm": 1.5248457193374634, "learning_rate": 6.031901871938023e-07, "loss": 0.7242, "step": 23973 }, { "epoch": 0.8468049220933467, "grad_norm": 2.143813371658325, "learning_rate": 6.029178512309253e-07, "loss": 0.7159, "step": 23974 }, { "epoch": 0.8468402438970546, "grad_norm": 1.5793291330337524, "learning_rate": 6.02645572816234e-07, "loss": 0.7503, "step": 23975 }, { "epoch": 0.8468755657007625, "grad_norm": 2.368319272994995, "learning_rate": 6.023733519532893e-07, "loss": 0.7642, "step": 23976 }, { "epoch": 0.8469108875044704, "grad_norm": 0.9745956659317017, "learning_rate": 6.021011886456563e-07, "loss": 0.5518, "step": 23977 }, { "epoch": 0.8469462093081783, "grad_norm": 1.7144089937210083, "learning_rate": 6.018290828968954e-07, "loss": 0.7706, "step": 23978 }, { "epoch": 0.8469815311118862, "grad_norm": 1.744006633758545, "learning_rate": 6.015570347105704e-07, "loss": 0.79, "step": 23979 }, { "epoch": 0.8470168529155941, "grad_norm": 2.2899329662323, "learning_rate": 6.01285044090239e-07, "loss": 0.7484, "step": 23980 }, { "epoch": 0.847052174719302, "grad_norm": 2.2811498641967773, "learning_rate": 6.010131110394629e-07, "loss": 0.7603, "step": 23981 }, { "epoch": 0.84708749652301, "grad_norm": 1.718321681022644, "learning_rate": 6.007412355618003e-07, "loss": 0.7922, "step": 23982 }, { "epoch": 0.8471228183267179, "grad_norm": 1.6748337745666504, "learning_rate": 6.004694176608083e-07, "loss": 0.7498, "step": 23983 }, { "epoch": 0.8471581401304258, "grad_norm": 1.7656368017196655, "learning_rate": 6.001976573400459e-07, "loss": 0.7415, "step": 23984 }, { "epoch": 0.8471934619341337, "grad_norm": 2.0174455642700195, "learning_rate": 5.9992595460307e-07, "loss": 0.7641, "step": 23985 }, { "epoch": 0.8472287837378416, "grad_norm": 2.2767436504364014, "learning_rate": 5.996543094534357e-07, "loss": 0.7882, "step": 23986 }, { "epoch": 0.8472641055415495, "grad_norm": 1.6868268251419067, "learning_rate": 5.993827218946985e-07, "loss": 0.7702, "step": 23987 }, { "epoch": 0.8472994273452574, "grad_norm": 2.0922799110412598, "learning_rate": 5.991111919304127e-07, "loss": 0.7539, "step": 23988 }, { "epoch": 0.8473347491489653, "grad_norm": 1.667820930480957, "learning_rate": 5.988397195641332e-07, "loss": 0.7894, "step": 23989 }, { "epoch": 0.8473700709526732, "grad_norm": 1.8419214487075806, "learning_rate": 5.985683047994118e-07, "loss": 0.7641, "step": 23990 }, { "epoch": 0.8474053927563812, "grad_norm": 1.7625792026519775, "learning_rate": 5.982969476398004e-07, "loss": 0.7452, "step": 23991 }, { "epoch": 0.847440714560089, "grad_norm": 1.725433349609375, "learning_rate": 5.980256480888525e-07, "loss": 0.7992, "step": 23992 }, { "epoch": 0.8474760363637969, "grad_norm": 1.7753093242645264, "learning_rate": 5.977544061501162e-07, "loss": 0.7744, "step": 23993 }, { "epoch": 0.8475113581675048, "grad_norm": 1.6524192094802856, "learning_rate": 5.974832218271426e-07, "loss": 0.749, "step": 23994 }, { "epoch": 0.8475466799712127, "grad_norm": 1.8344361782073975, "learning_rate": 5.972120951234822e-07, "loss": 0.7763, "step": 23995 }, { "epoch": 0.8475820017749206, "grad_norm": 2.3300442695617676, "learning_rate": 5.969410260426812e-07, "loss": 0.7199, "step": 23996 }, { "epoch": 0.8476173235786285, "grad_norm": 1.56271493434906, "learning_rate": 5.966700145882886e-07, "loss": 0.7609, "step": 23997 }, { "epoch": 0.8476526453823364, "grad_norm": 1.6431788206100464, "learning_rate": 5.96399060763852e-07, "loss": 0.7642, "step": 23998 }, { "epoch": 0.8476879671860443, "grad_norm": 1.6751500368118286, "learning_rate": 5.961281645729167e-07, "loss": 0.798, "step": 23999 }, { "epoch": 0.8477232889897522, "grad_norm": 1.6552420854568481, "learning_rate": 5.95857326019027e-07, "loss": 0.7565, "step": 24000 }, { "epoch": 0.8477586107934602, "grad_norm": 1.9051592350006104, "learning_rate": 5.955865451057285e-07, "loss": 0.778, "step": 24001 }, { "epoch": 0.8477939325971681, "grad_norm": 1.6310791969299316, "learning_rate": 5.953158218365667e-07, "loss": 0.7563, "step": 24002 }, { "epoch": 0.847829254400876, "grad_norm": 1.9264737367630005, "learning_rate": 5.950451562150828e-07, "loss": 0.807, "step": 24003 }, { "epoch": 0.8478645762045839, "grad_norm": 1.6648023128509521, "learning_rate": 5.947745482448192e-07, "loss": 0.7108, "step": 24004 }, { "epoch": 0.8478998980082918, "grad_norm": 2.0459911823272705, "learning_rate": 5.945039979293193e-07, "loss": 0.7514, "step": 24005 }, { "epoch": 0.8479352198119997, "grad_norm": 1.6087251901626587, "learning_rate": 5.942335052721226e-07, "loss": 0.7378, "step": 24006 }, { "epoch": 0.8479705416157076, "grad_norm": 1.6404613256454468, "learning_rate": 5.93963070276769e-07, "loss": 0.751, "step": 24007 }, { "epoch": 0.8480058634194155, "grad_norm": 1.693758249282837, "learning_rate": 5.936926929467995e-07, "loss": 0.7495, "step": 24008 }, { "epoch": 0.8480411852231234, "grad_norm": 2.7171101570129395, "learning_rate": 5.934223732857509e-07, "loss": 0.7754, "step": 24009 }, { "epoch": 0.8480765070268313, "grad_norm": 1.9496724605560303, "learning_rate": 5.931521112971616e-07, "loss": 0.7949, "step": 24010 }, { "epoch": 0.8481118288305393, "grad_norm": 1.6413660049438477, "learning_rate": 5.928819069845704e-07, "loss": 0.7633, "step": 24011 }, { "epoch": 0.8481471506342472, "grad_norm": 1.711199402809143, "learning_rate": 5.926117603515113e-07, "loss": 0.7301, "step": 24012 }, { "epoch": 0.8481824724379551, "grad_norm": 1.7370226383209229, "learning_rate": 5.923416714015212e-07, "loss": 0.7691, "step": 24013 }, { "epoch": 0.848217794241663, "grad_norm": 1.7945207357406616, "learning_rate": 5.920716401381355e-07, "loss": 0.7379, "step": 24014 }, { "epoch": 0.8482531160453709, "grad_norm": 1.6012247800827026, "learning_rate": 5.918016665648868e-07, "loss": 0.7122, "step": 24015 }, { "epoch": 0.8482884378490788, "grad_norm": 1.8809274435043335, "learning_rate": 5.915317506853091e-07, "loss": 0.7624, "step": 24016 }, { "epoch": 0.8483237596527867, "grad_norm": 1.6804157495498657, "learning_rate": 5.912618925029357e-07, "loss": 0.7571, "step": 24017 }, { "epoch": 0.8483590814564945, "grad_norm": 1.81935715675354, "learning_rate": 5.909920920212986e-07, "loss": 0.7984, "step": 24018 }, { "epoch": 0.8483944032602024, "grad_norm": 1.79244065284729, "learning_rate": 5.907223492439268e-07, "loss": 0.7446, "step": 24019 }, { "epoch": 0.8484297250639103, "grad_norm": 1.6892063617706299, "learning_rate": 5.904526641743519e-07, "loss": 0.7909, "step": 24020 }, { "epoch": 0.8484650468676183, "grad_norm": 1.7128746509552002, "learning_rate": 5.901830368161049e-07, "loss": 0.7748, "step": 24021 }, { "epoch": 0.8485003686713262, "grad_norm": 1.7558292150497437, "learning_rate": 5.899134671727124e-07, "loss": 0.7953, "step": 24022 }, { "epoch": 0.8485356904750341, "grad_norm": 1.6933338642120361, "learning_rate": 5.896439552477035e-07, "loss": 0.7784, "step": 24023 }, { "epoch": 0.848571012278742, "grad_norm": 1.7521449327468872, "learning_rate": 5.893745010446061e-07, "loss": 0.7422, "step": 24024 }, { "epoch": 0.8486063340824499, "grad_norm": 1.6759114265441895, "learning_rate": 5.891051045669455e-07, "loss": 0.792, "step": 24025 }, { "epoch": 0.8486416558861578, "grad_norm": 1.708190679550171, "learning_rate": 5.888357658182481e-07, "loss": 0.7635, "step": 24026 }, { "epoch": 0.8486769776898657, "grad_norm": 1.682286024093628, "learning_rate": 5.885664848020394e-07, "loss": 0.7484, "step": 24027 }, { "epoch": 0.8487122994935736, "grad_norm": 1.6039845943450928, "learning_rate": 5.88297261521843e-07, "loss": 0.7031, "step": 24028 }, { "epoch": 0.8487476212972815, "grad_norm": 1.6938685178756714, "learning_rate": 5.880280959811824e-07, "loss": 0.745, "step": 24029 }, { "epoch": 0.8487829431009895, "grad_norm": 1.5730459690093994, "learning_rate": 5.877589881835821e-07, "loss": 0.7522, "step": 24030 }, { "epoch": 0.8488182649046974, "grad_norm": 1.971972107887268, "learning_rate": 5.874899381325611e-07, "loss": 0.7544, "step": 24031 }, { "epoch": 0.8488535867084053, "grad_norm": 1.7446660995483398, "learning_rate": 5.87220945831643e-07, "loss": 0.7672, "step": 24032 }, { "epoch": 0.8488889085121132, "grad_norm": 1.9711576700210571, "learning_rate": 5.869520112843479e-07, "loss": 0.7841, "step": 24033 }, { "epoch": 0.8489242303158211, "grad_norm": 2.380598783493042, "learning_rate": 5.866831344941948e-07, "loss": 0.7444, "step": 24034 }, { "epoch": 0.848959552119529, "grad_norm": 1.7024145126342773, "learning_rate": 5.864143154647034e-07, "loss": 0.7707, "step": 24035 }, { "epoch": 0.8489948739232369, "grad_norm": 1.6175062656402588, "learning_rate": 5.861455541993927e-07, "loss": 0.7626, "step": 24036 }, { "epoch": 0.8490301957269448, "grad_norm": 1.7095540761947632, "learning_rate": 5.858768507017787e-07, "loss": 0.7324, "step": 24037 }, { "epoch": 0.8490655175306527, "grad_norm": 1.6964036226272583, "learning_rate": 5.856082049753781e-07, "loss": 0.7302, "step": 24038 }, { "epoch": 0.8491008393343606, "grad_norm": 1.6647580862045288, "learning_rate": 5.853396170237075e-07, "loss": 0.7385, "step": 24039 }, { "epoch": 0.8491361611380686, "grad_norm": 2.103069543838501, "learning_rate": 5.850710868502835e-07, "loss": 0.7576, "step": 24040 }, { "epoch": 0.8491714829417765, "grad_norm": 1.7477737665176392, "learning_rate": 5.848026144586178e-07, "loss": 0.7856, "step": 24041 }, { "epoch": 0.8492068047454844, "grad_norm": 1.7736724615097046, "learning_rate": 5.845341998522253e-07, "loss": 0.7648, "step": 24042 }, { "epoch": 0.8492421265491923, "grad_norm": 1.653801441192627, "learning_rate": 5.842658430346204e-07, "loss": 0.7863, "step": 24043 }, { "epoch": 0.8492774483529001, "grad_norm": 1.664368748664856, "learning_rate": 5.839975440093132e-07, "loss": 0.777, "step": 24044 }, { "epoch": 0.849312770156608, "grad_norm": 1.5949212312698364, "learning_rate": 5.837293027798163e-07, "loss": 0.7604, "step": 24045 }, { "epoch": 0.8493480919603159, "grad_norm": 1.7188308238983154, "learning_rate": 5.834611193496397e-07, "loss": 0.7437, "step": 24046 }, { "epoch": 0.8493834137640238, "grad_norm": 1.9272112846374512, "learning_rate": 5.831929937222946e-07, "loss": 0.7856, "step": 24047 }, { "epoch": 0.8494187355677317, "grad_norm": 1.5087865591049194, "learning_rate": 5.829249259012886e-07, "loss": 0.7589, "step": 24048 }, { "epoch": 0.8494540573714396, "grad_norm": 1.5916972160339355, "learning_rate": 5.826569158901308e-07, "loss": 0.7492, "step": 24049 }, { "epoch": 0.8494893791751476, "grad_norm": 1.516743779182434, "learning_rate": 5.823889636923297e-07, "loss": 0.752, "step": 24050 }, { "epoch": 0.8495247009788555, "grad_norm": 1.7288821935653687, "learning_rate": 5.82121069311391e-07, "loss": 0.7252, "step": 24051 }, { "epoch": 0.8495600227825634, "grad_norm": 1.6609584093093872, "learning_rate": 5.818532327508209e-07, "loss": 0.7764, "step": 24052 }, { "epoch": 0.8495953445862713, "grad_norm": 1.9520996809005737, "learning_rate": 5.815854540141258e-07, "loss": 0.7555, "step": 24053 }, { "epoch": 0.8496306663899792, "grad_norm": 1.6504735946655273, "learning_rate": 5.813177331048092e-07, "loss": 0.7824, "step": 24054 }, { "epoch": 0.8496659881936871, "grad_norm": 1.8038430213928223, "learning_rate": 5.810500700263766e-07, "loss": 0.7772, "step": 24055 }, { "epoch": 0.849701309997395, "grad_norm": 1.6586476564407349, "learning_rate": 5.807824647823285e-07, "loss": 0.7504, "step": 24056 }, { "epoch": 0.8497366318011029, "grad_norm": 1.865115761756897, "learning_rate": 5.805149173761704e-07, "loss": 0.766, "step": 24057 }, { "epoch": 0.8497719536048108, "grad_norm": 1.763131022453308, "learning_rate": 5.802474278114007e-07, "loss": 0.7536, "step": 24058 }, { "epoch": 0.8498072754085187, "grad_norm": 1.6101945638656616, "learning_rate": 5.79979996091522e-07, "loss": 0.7353, "step": 24059 }, { "epoch": 0.8498425972122267, "grad_norm": 1.5800607204437256, "learning_rate": 5.797126222200355e-07, "loss": 0.7632, "step": 24060 }, { "epoch": 0.8498779190159346, "grad_norm": 0.9707474708557129, "learning_rate": 5.794453062004379e-07, "loss": 0.5683, "step": 24061 }, { "epoch": 0.8499132408196425, "grad_norm": 1.5531541109085083, "learning_rate": 5.791780480362297e-07, "loss": 0.7189, "step": 24062 }, { "epoch": 0.8499485626233504, "grad_norm": 1.760329246520996, "learning_rate": 5.789108477309086e-07, "loss": 0.7666, "step": 24063 }, { "epoch": 0.8499838844270583, "grad_norm": 1.7258000373840332, "learning_rate": 5.786437052879707e-07, "loss": 0.775, "step": 24064 }, { "epoch": 0.8500192062307662, "grad_norm": 1.6988704204559326, "learning_rate": 5.783766207109126e-07, "loss": 0.7531, "step": 24065 }, { "epoch": 0.8500545280344741, "grad_norm": 1.7492573261260986, "learning_rate": 5.781095940032311e-07, "loss": 0.7485, "step": 24066 }, { "epoch": 0.850089849838182, "grad_norm": 1.975711703300476, "learning_rate": 5.778426251684194e-07, "loss": 0.7168, "step": 24067 }, { "epoch": 0.8501251716418899, "grad_norm": 1.7138456106185913, "learning_rate": 5.775757142099714e-07, "loss": 0.7675, "step": 24068 }, { "epoch": 0.8501604934455979, "grad_norm": 2.1112263202667236, "learning_rate": 5.773088611313827e-07, "loss": 0.7753, "step": 24069 }, { "epoch": 0.8501958152493057, "grad_norm": 1.7310140132904053, "learning_rate": 5.770420659361431e-07, "loss": 0.7576, "step": 24070 }, { "epoch": 0.8502311370530136, "grad_norm": 1.6362957954406738, "learning_rate": 5.767753286277455e-07, "loss": 0.7345, "step": 24071 }, { "epoch": 0.8502664588567215, "grad_norm": 2.02616548538208, "learning_rate": 5.765086492096822e-07, "loss": 0.7815, "step": 24072 }, { "epoch": 0.8503017806604294, "grad_norm": 1.658137321472168, "learning_rate": 5.762420276854419e-07, "loss": 0.736, "step": 24073 }, { "epoch": 0.8503371024641373, "grad_norm": 1.996479868888855, "learning_rate": 5.75975464058513e-07, "loss": 0.7466, "step": 24074 }, { "epoch": 0.8503724242678452, "grad_norm": 1.897236943244934, "learning_rate": 5.75708958332386e-07, "loss": 0.7966, "step": 24075 }, { "epoch": 0.8504077460715531, "grad_norm": 3.3464252948760986, "learning_rate": 5.754425105105499e-07, "loss": 0.7827, "step": 24076 }, { "epoch": 0.850443067875261, "grad_norm": 1.8369207382202148, "learning_rate": 5.75176120596489e-07, "loss": 0.7653, "step": 24077 }, { "epoch": 0.8504783896789689, "grad_norm": 1.625123143196106, "learning_rate": 5.74909788593691e-07, "loss": 0.772, "step": 24078 }, { "epoch": 0.8505137114826768, "grad_norm": 1.5810141563415527, "learning_rate": 5.746435145056434e-07, "loss": 0.7575, "step": 24079 }, { "epoch": 0.8505490332863848, "grad_norm": 2.998770236968994, "learning_rate": 5.743772983358281e-07, "loss": 0.7743, "step": 24080 }, { "epoch": 0.8505843550900927, "grad_norm": 1.625209093093872, "learning_rate": 5.741111400877314e-07, "loss": 0.782, "step": 24081 }, { "epoch": 0.8506196768938006, "grad_norm": 1.7524882555007935, "learning_rate": 5.738450397648365e-07, "loss": 0.76, "step": 24082 }, { "epoch": 0.8506549986975085, "grad_norm": 1.7915794849395752, "learning_rate": 5.735789973706251e-07, "loss": 0.751, "step": 24083 }, { "epoch": 0.8506903205012164, "grad_norm": 2.8045284748077393, "learning_rate": 5.733130129085796e-07, "loss": 0.7857, "step": 24084 }, { "epoch": 0.8507256423049243, "grad_norm": 1.5057810544967651, "learning_rate": 5.730470863821824e-07, "loss": 0.7629, "step": 24085 }, { "epoch": 0.8507609641086322, "grad_norm": 7.738064289093018, "learning_rate": 5.727812177949116e-07, "loss": 0.7647, "step": 24086 }, { "epoch": 0.8507962859123401, "grad_norm": 1.7270004749298096, "learning_rate": 5.725154071502481e-07, "loss": 0.7629, "step": 24087 }, { "epoch": 0.850831607716048, "grad_norm": 1.7918968200683594, "learning_rate": 5.722496544516714e-07, "loss": 0.778, "step": 24088 }, { "epoch": 0.850866929519756, "grad_norm": 1.7790205478668213, "learning_rate": 5.719839597026583e-07, "loss": 0.7724, "step": 24089 }, { "epoch": 0.8509022513234639, "grad_norm": 1.7510302066802979, "learning_rate": 5.717183229066863e-07, "loss": 0.7478, "step": 24090 }, { "epoch": 0.8509375731271718, "grad_norm": 1.829211950302124, "learning_rate": 5.71452744067234e-07, "loss": 0.7648, "step": 24091 }, { "epoch": 0.8509728949308797, "grad_norm": 1.9732160568237305, "learning_rate": 5.711872231877752e-07, "loss": 0.756, "step": 24092 }, { "epoch": 0.8510082167345876, "grad_norm": 1.5959413051605225, "learning_rate": 5.709217602717848e-07, "loss": 0.7551, "step": 24093 }, { "epoch": 0.8510435385382955, "grad_norm": 1.8529645204544067, "learning_rate": 5.706563553227379e-07, "loss": 0.7516, "step": 24094 }, { "epoch": 0.8510788603420034, "grad_norm": 1.8289964199066162, "learning_rate": 5.703910083441089e-07, "loss": 0.7996, "step": 24095 }, { "epoch": 0.8511141821457112, "grad_norm": 1.7368618249893188, "learning_rate": 5.701257193393684e-07, "loss": 0.7856, "step": 24096 }, { "epoch": 0.8511495039494191, "grad_norm": 1.7408767938613892, "learning_rate": 5.698604883119902e-07, "loss": 0.77, "step": 24097 }, { "epoch": 0.851184825753127, "grad_norm": 1.6500775814056396, "learning_rate": 5.69595315265446e-07, "loss": 0.7301, "step": 24098 }, { "epoch": 0.851220147556835, "grad_norm": 1.6370623111724854, "learning_rate": 5.693302002032042e-07, "loss": 0.7392, "step": 24099 }, { "epoch": 0.8512554693605429, "grad_norm": 2.5903401374816895, "learning_rate": 5.690651431287364e-07, "loss": 0.7643, "step": 24100 }, { "epoch": 0.8512907911642508, "grad_norm": 1.5906490087509155, "learning_rate": 5.688001440455116e-07, "loss": 0.7499, "step": 24101 }, { "epoch": 0.8513261129679587, "grad_norm": 1.7634104490280151, "learning_rate": 5.685352029569968e-07, "loss": 0.7283, "step": 24102 }, { "epoch": 0.8513614347716666, "grad_norm": 1.6709281206130981, "learning_rate": 5.6827031986666e-07, "loss": 0.7583, "step": 24103 }, { "epoch": 0.8513967565753745, "grad_norm": 1.7860612869262695, "learning_rate": 5.680054947779684e-07, "loss": 0.7942, "step": 24104 }, { "epoch": 0.8514320783790824, "grad_norm": 2.766833782196045, "learning_rate": 5.677407276943886e-07, "loss": 0.7717, "step": 24105 }, { "epoch": 0.8514674001827903, "grad_norm": 3.6963703632354736, "learning_rate": 5.674760186193845e-07, "loss": 0.7523, "step": 24106 }, { "epoch": 0.8515027219864982, "grad_norm": 1.8344604969024658, "learning_rate": 5.672113675564206e-07, "loss": 0.7493, "step": 24107 }, { "epoch": 0.8515380437902061, "grad_norm": 1.5634509325027466, "learning_rate": 5.669467745089619e-07, "loss": 0.7432, "step": 24108 }, { "epoch": 0.8515733655939141, "grad_norm": 1.6488858461380005, "learning_rate": 5.666822394804694e-07, "loss": 0.7657, "step": 24109 }, { "epoch": 0.851608687397622, "grad_norm": 1.600111961364746, "learning_rate": 5.664177624744077e-07, "loss": 0.7699, "step": 24110 }, { "epoch": 0.8516440092013299, "grad_norm": 1.78163743019104, "learning_rate": 5.661533434942373e-07, "loss": 0.7783, "step": 24111 }, { "epoch": 0.8516793310050378, "grad_norm": 1.5365114212036133, "learning_rate": 5.658889825434166e-07, "loss": 0.7166, "step": 24112 }, { "epoch": 0.8517146528087457, "grad_norm": 1.7930198907852173, "learning_rate": 5.656246796254078e-07, "loss": 0.7324, "step": 24113 }, { "epoch": 0.8517499746124536, "grad_norm": 2.0054028034210205, "learning_rate": 5.653604347436698e-07, "loss": 0.7208, "step": 24114 }, { "epoch": 0.8517852964161615, "grad_norm": 2.0165517330169678, "learning_rate": 5.65096247901662e-07, "loss": 0.7764, "step": 24115 }, { "epoch": 0.8518206182198694, "grad_norm": 1.8528330326080322, "learning_rate": 5.648321191028395e-07, "loss": 0.7634, "step": 24116 }, { "epoch": 0.8518559400235773, "grad_norm": 1.779543161392212, "learning_rate": 5.645680483506599e-07, "loss": 0.7649, "step": 24117 }, { "epoch": 0.8518912618272853, "grad_norm": 1.7092736959457397, "learning_rate": 5.643040356485819e-07, "loss": 0.7809, "step": 24118 }, { "epoch": 0.8519265836309932, "grad_norm": 1.855991244316101, "learning_rate": 5.640400810000573e-07, "loss": 0.7703, "step": 24119 }, { "epoch": 0.8519619054347011, "grad_norm": 1.6488068103790283, "learning_rate": 5.637761844085426e-07, "loss": 0.7492, "step": 24120 }, { "epoch": 0.851997227238409, "grad_norm": 2.669297695159912, "learning_rate": 5.635123458774921e-07, "loss": 0.743, "step": 24121 }, { "epoch": 0.8520325490421168, "grad_norm": 1.7378292083740234, "learning_rate": 5.63248565410357e-07, "loss": 0.7754, "step": 24122 }, { "epoch": 0.8520678708458247, "grad_norm": 1.5579365491867065, "learning_rate": 5.629848430105906e-07, "loss": 0.7468, "step": 24123 }, { "epoch": 0.8521031926495326, "grad_norm": 1.9027187824249268, "learning_rate": 5.627211786816455e-07, "loss": 0.7837, "step": 24124 }, { "epoch": 0.8521385144532405, "grad_norm": 1.8687493801116943, "learning_rate": 5.624575724269704e-07, "loss": 0.7763, "step": 24125 }, { "epoch": 0.8521738362569484, "grad_norm": 1.7043853998184204, "learning_rate": 5.621940242500168e-07, "loss": 0.7541, "step": 24126 }, { "epoch": 0.8522091580606563, "grad_norm": 1.5628206729888916, "learning_rate": 5.619305341542347e-07, "loss": 0.7342, "step": 24127 }, { "epoch": 0.8522444798643642, "grad_norm": 1.779311180114746, "learning_rate": 5.616671021430697e-07, "loss": 0.7339, "step": 24128 }, { "epoch": 0.8522798016680722, "grad_norm": 2.232335090637207, "learning_rate": 5.61403728219973e-07, "loss": 0.7407, "step": 24129 }, { "epoch": 0.8523151234717801, "grad_norm": 1.6395865678787231, "learning_rate": 5.61140412388389e-07, "loss": 0.7069, "step": 24130 }, { "epoch": 0.852350445275488, "grad_norm": 1.7448616027832031, "learning_rate": 5.608771546517655e-07, "loss": 0.7465, "step": 24131 }, { "epoch": 0.8523857670791959, "grad_norm": 1.8125146627426147, "learning_rate": 5.606139550135465e-07, "loss": 0.7755, "step": 24132 }, { "epoch": 0.8524210888829038, "grad_norm": 1.6625237464904785, "learning_rate": 5.603508134771779e-07, "loss": 0.7346, "step": 24133 }, { "epoch": 0.8524564106866117, "grad_norm": 2.112217903137207, "learning_rate": 5.600877300461038e-07, "loss": 0.7669, "step": 24134 }, { "epoch": 0.8524917324903196, "grad_norm": 1.7716008424758911, "learning_rate": 5.598247047237659e-07, "loss": 0.781, "step": 24135 }, { "epoch": 0.8525270542940275, "grad_norm": 1.8412859439849854, "learning_rate": 5.595617375136081e-07, "loss": 0.7773, "step": 24136 }, { "epoch": 0.8525623760977354, "grad_norm": 1.6619945764541626, "learning_rate": 5.592988284190721e-07, "loss": 0.7432, "step": 24137 }, { "epoch": 0.8525976979014434, "grad_norm": 1.7429742813110352, "learning_rate": 5.590359774435972e-07, "loss": 0.7699, "step": 24138 }, { "epoch": 0.8526330197051513, "grad_norm": 1.7825456857681274, "learning_rate": 5.587731845906247e-07, "loss": 0.7603, "step": 24139 }, { "epoch": 0.8526683415088592, "grad_norm": 1.7664034366607666, "learning_rate": 5.585104498635951e-07, "loss": 0.7805, "step": 24140 }, { "epoch": 0.8527036633125671, "grad_norm": 1.5615345239639282, "learning_rate": 5.582477732659447e-07, "loss": 0.7928, "step": 24141 }, { "epoch": 0.852738985116275, "grad_norm": 1.8019564151763916, "learning_rate": 5.579851548011123e-07, "loss": 0.7408, "step": 24142 }, { "epoch": 0.8527743069199829, "grad_norm": 1.804606318473816, "learning_rate": 5.577225944725367e-07, "loss": 0.7517, "step": 24143 }, { "epoch": 0.8528096287236908, "grad_norm": 2.4867842197418213, "learning_rate": 5.574600922836515e-07, "loss": 0.7964, "step": 24144 }, { "epoch": 0.8528449505273987, "grad_norm": 1.6073552370071411, "learning_rate": 5.571976482378932e-07, "loss": 0.7615, "step": 24145 }, { "epoch": 0.8528802723311066, "grad_norm": 1.6975376605987549, "learning_rate": 5.569352623386981e-07, "loss": 0.7553, "step": 24146 }, { "epoch": 0.8529155941348145, "grad_norm": 1.7681505680084229, "learning_rate": 5.566729345894994e-07, "loss": 0.7674, "step": 24147 }, { "epoch": 0.8529509159385223, "grad_norm": 0.909318208694458, "learning_rate": 5.564106649937284e-07, "loss": 0.5798, "step": 24148 }, { "epoch": 0.8529862377422303, "grad_norm": 0.9221135377883911, "learning_rate": 5.561484535548201e-07, "loss": 0.532, "step": 24149 }, { "epoch": 0.8530215595459382, "grad_norm": 1.550436019897461, "learning_rate": 5.55886300276206e-07, "loss": 0.7531, "step": 24150 }, { "epoch": 0.8530568813496461, "grad_norm": 1.8069132566452026, "learning_rate": 5.556242051613154e-07, "loss": 0.7774, "step": 24151 }, { "epoch": 0.853092203153354, "grad_norm": 2.478665351867676, "learning_rate": 5.553621682135801e-07, "loss": 0.729, "step": 24152 }, { "epoch": 0.8531275249570619, "grad_norm": 1.6133474111557007, "learning_rate": 5.551001894364305e-07, "loss": 0.7907, "step": 24153 }, { "epoch": 0.8531628467607698, "grad_norm": 1.7523353099822998, "learning_rate": 5.548382688332926e-07, "loss": 0.8041, "step": 24154 }, { "epoch": 0.8531981685644777, "grad_norm": 1.8820321559906006, "learning_rate": 5.545764064075959e-07, "loss": 0.7773, "step": 24155 }, { "epoch": 0.8532334903681856, "grad_norm": 1.7447758913040161, "learning_rate": 5.543146021627688e-07, "loss": 0.7329, "step": 24156 }, { "epoch": 0.8532688121718935, "grad_norm": 1.7861891984939575, "learning_rate": 5.540528561022352e-07, "loss": 0.7729, "step": 24157 }, { "epoch": 0.8533041339756015, "grad_norm": 2.5874974727630615, "learning_rate": 5.537911682294223e-07, "loss": 0.7583, "step": 24158 }, { "epoch": 0.8533394557793094, "grad_norm": 1.6503206491470337, "learning_rate": 5.535295385477557e-07, "loss": 0.7683, "step": 24159 }, { "epoch": 0.8533747775830173, "grad_norm": 1.683824896812439, "learning_rate": 5.532679670606577e-07, "loss": 0.7413, "step": 24160 }, { "epoch": 0.8534100993867252, "grad_norm": 1.830012559890747, "learning_rate": 5.530064537715524e-07, "loss": 0.7659, "step": 24161 }, { "epoch": 0.8534454211904331, "grad_norm": 1.601371169090271, "learning_rate": 5.527449986838629e-07, "loss": 0.7471, "step": 24162 }, { "epoch": 0.853480742994141, "grad_norm": 1.7268214225769043, "learning_rate": 5.524836018010121e-07, "loss": 0.7243, "step": 24163 }, { "epoch": 0.8535160647978489, "grad_norm": 2.201612710952759, "learning_rate": 5.522222631264185e-07, "loss": 0.8074, "step": 24164 }, { "epoch": 0.8535513866015568, "grad_norm": 1.640364408493042, "learning_rate": 5.519609826635042e-07, "loss": 0.769, "step": 24165 }, { "epoch": 0.8535867084052647, "grad_norm": 1.753854751586914, "learning_rate": 5.516997604156898e-07, "loss": 0.787, "step": 24166 }, { "epoch": 0.8536220302089726, "grad_norm": 3.802173614501953, "learning_rate": 5.514385963863916e-07, "loss": 0.7824, "step": 24167 }, { "epoch": 0.8536573520126806, "grad_norm": 1.694408655166626, "learning_rate": 5.511774905790279e-07, "loss": 0.7845, "step": 24168 }, { "epoch": 0.8536926738163885, "grad_norm": 1.5100468397140503, "learning_rate": 5.509164429970188e-07, "loss": 0.7606, "step": 24169 }, { "epoch": 0.8537279956200964, "grad_norm": 1.656670331954956, "learning_rate": 5.506554536437775e-07, "loss": 0.7513, "step": 24170 }, { "epoch": 0.8537633174238043, "grad_norm": 1.7990748882293701, "learning_rate": 5.503945225227209e-07, "loss": 0.7226, "step": 24171 }, { "epoch": 0.8537986392275122, "grad_norm": 1.7805423736572266, "learning_rate": 5.50133649637265e-07, "loss": 0.7846, "step": 24172 }, { "epoch": 0.8538339610312201, "grad_norm": 1.81497323513031, "learning_rate": 5.498728349908239e-07, "loss": 0.752, "step": 24173 }, { "epoch": 0.8538692828349279, "grad_norm": 1.732163906097412, "learning_rate": 5.496120785868098e-07, "loss": 0.7835, "step": 24174 }, { "epoch": 0.8539046046386358, "grad_norm": 1.616178035736084, "learning_rate": 5.493513804286366e-07, "loss": 0.7846, "step": 24175 }, { "epoch": 0.8539399264423437, "grad_norm": 1.6991428136825562, "learning_rate": 5.490907405197166e-07, "loss": 0.7454, "step": 24176 }, { "epoch": 0.8539752482460516, "grad_norm": 1.7569717168807983, "learning_rate": 5.488301588634592e-07, "loss": 0.7871, "step": 24177 }, { "epoch": 0.8540105700497596, "grad_norm": 1.706809639930725, "learning_rate": 5.485696354632758e-07, "loss": 0.7279, "step": 24178 }, { "epoch": 0.8540458918534675, "grad_norm": 1.7667856216430664, "learning_rate": 5.483091703225774e-07, "loss": 0.7859, "step": 24179 }, { "epoch": 0.8540812136571754, "grad_norm": 1.7093762159347534, "learning_rate": 5.480487634447712e-07, "loss": 0.7588, "step": 24180 }, { "epoch": 0.8541165354608833, "grad_norm": 1.6378875970840454, "learning_rate": 5.477884148332651e-07, "loss": 0.7642, "step": 24181 }, { "epoch": 0.8541518572645912, "grad_norm": 5.515681266784668, "learning_rate": 5.475281244914687e-07, "loss": 0.7462, "step": 24182 }, { "epoch": 0.8541871790682991, "grad_norm": 1.7832274436950684, "learning_rate": 5.472678924227865e-07, "loss": 0.7605, "step": 24183 }, { "epoch": 0.854222500872007, "grad_norm": 1.6393778324127197, "learning_rate": 5.47007718630626e-07, "loss": 0.7565, "step": 24184 }, { "epoch": 0.8542578226757149, "grad_norm": 1.7651209831237793, "learning_rate": 5.467476031183899e-07, "loss": 0.7508, "step": 24185 }, { "epoch": 0.8542931444794228, "grad_norm": 1.684499740600586, "learning_rate": 5.464875458894858e-07, "loss": 0.7629, "step": 24186 }, { "epoch": 0.8543284662831307, "grad_norm": 2.7832558155059814, "learning_rate": 5.462275469473138e-07, "loss": 0.761, "step": 24187 }, { "epoch": 0.8543637880868387, "grad_norm": 1.529882550239563, "learning_rate": 5.45967606295279e-07, "loss": 0.7504, "step": 24188 }, { "epoch": 0.8543991098905466, "grad_norm": 1.997098445892334, "learning_rate": 5.457077239367836e-07, "loss": 0.79, "step": 24189 }, { "epoch": 0.8544344316942545, "grad_norm": 1.8597339391708374, "learning_rate": 5.454478998752272e-07, "loss": 0.7806, "step": 24190 }, { "epoch": 0.8544697534979624, "grad_norm": 1.6810994148254395, "learning_rate": 5.451881341140114e-07, "loss": 0.7403, "step": 24191 }, { "epoch": 0.8545050753016703, "grad_norm": 1.6628212928771973, "learning_rate": 5.449284266565369e-07, "loss": 0.7567, "step": 24192 }, { "epoch": 0.8545403971053782, "grad_norm": 1.7802790403366089, "learning_rate": 5.44668777506201e-07, "loss": 0.7453, "step": 24193 }, { "epoch": 0.8545757189090861, "grad_norm": 1.7231502532958984, "learning_rate": 5.44409186666402e-07, "loss": 0.7585, "step": 24194 }, { "epoch": 0.854611040712794, "grad_norm": 1.875463604927063, "learning_rate": 5.441496541405394e-07, "loss": 0.7389, "step": 24195 }, { "epoch": 0.854646362516502, "grad_norm": 1.7647075653076172, "learning_rate": 5.438901799320079e-07, "loss": 0.7923, "step": 24196 }, { "epoch": 0.8546816843202099, "grad_norm": 1.814686894416809, "learning_rate": 5.436307640442034e-07, "loss": 0.7391, "step": 24197 }, { "epoch": 0.8547170061239178, "grad_norm": 1.676472783088684, "learning_rate": 5.433714064805228e-07, "loss": 0.7543, "step": 24198 }, { "epoch": 0.8547523279276257, "grad_norm": 1.9139481782913208, "learning_rate": 5.431121072443584e-07, "loss": 0.7693, "step": 24199 }, { "epoch": 0.8547876497313335, "grad_norm": 1.5956312417984009, "learning_rate": 5.428528663391052e-07, "loss": 0.7707, "step": 24200 }, { "epoch": 0.8548229715350414, "grad_norm": 1.9506056308746338, "learning_rate": 5.425936837681572e-07, "loss": 0.7427, "step": 24201 }, { "epoch": 0.8548582933387493, "grad_norm": 1.7470585107803345, "learning_rate": 5.423345595349039e-07, "loss": 0.7641, "step": 24202 }, { "epoch": 0.8548936151424572, "grad_norm": 1.6349029541015625, "learning_rate": 5.420754936427386e-07, "loss": 0.7523, "step": 24203 }, { "epoch": 0.8549289369461651, "grad_norm": 1.74413001537323, "learning_rate": 5.418164860950504e-07, "loss": 0.7558, "step": 24204 }, { "epoch": 0.854964258749873, "grad_norm": 1.6346423625946045, "learning_rate": 5.415575368952314e-07, "loss": 0.7777, "step": 24205 }, { "epoch": 0.8549995805535809, "grad_norm": 1.9941493272781372, "learning_rate": 5.412986460466679e-07, "loss": 0.7604, "step": 24206 }, { "epoch": 0.8550349023572889, "grad_norm": 1.6734813451766968, "learning_rate": 5.410398135527494e-07, "loss": 0.7543, "step": 24207 }, { "epoch": 0.8550702241609968, "grad_norm": 1.633952260017395, "learning_rate": 5.40781039416865e-07, "loss": 0.7703, "step": 24208 }, { "epoch": 0.8551055459647047, "grad_norm": 3.1712114810943604, "learning_rate": 5.405223236423984e-07, "loss": 0.787, "step": 24209 }, { "epoch": 0.8551408677684126, "grad_norm": 1.6962225437164307, "learning_rate": 5.402636662327381e-07, "loss": 0.7669, "step": 24210 }, { "epoch": 0.8551761895721205, "grad_norm": 1.5439789295196533, "learning_rate": 5.400050671912693e-07, "loss": 0.7624, "step": 24211 }, { "epoch": 0.8552115113758284, "grad_norm": 1.7477325201034546, "learning_rate": 5.397465265213747e-07, "loss": 0.7968, "step": 24212 }, { "epoch": 0.8552468331795363, "grad_norm": 1.9232641458511353, "learning_rate": 5.394880442264395e-07, "loss": 0.7448, "step": 24213 }, { "epoch": 0.8552821549832442, "grad_norm": 1.6580719947814941, "learning_rate": 5.392296203098468e-07, "loss": 0.796, "step": 24214 }, { "epoch": 0.8553174767869521, "grad_norm": 1.6655455827713013, "learning_rate": 5.389712547749776e-07, "loss": 0.7616, "step": 24215 }, { "epoch": 0.85535279859066, "grad_norm": 1.6055420637130737, "learning_rate": 5.38712947625214e-07, "loss": 0.7353, "step": 24216 }, { "epoch": 0.855388120394368, "grad_norm": 1.6673365831375122, "learning_rate": 5.384546988639377e-07, "loss": 0.7443, "step": 24217 }, { "epoch": 0.8554234421980759, "grad_norm": 1.9816964864730835, "learning_rate": 5.381965084945268e-07, "loss": 0.7746, "step": 24218 }, { "epoch": 0.8554587640017838, "grad_norm": 1.5933048725128174, "learning_rate": 5.379383765203611e-07, "loss": 0.7521, "step": 24219 }, { "epoch": 0.8554940858054917, "grad_norm": 1.5446813106536865, "learning_rate": 5.376803029448191e-07, "loss": 0.7613, "step": 24220 }, { "epoch": 0.8555294076091996, "grad_norm": 3.129812002182007, "learning_rate": 5.374222877712809e-07, "loss": 0.7635, "step": 24221 }, { "epoch": 0.8555647294129075, "grad_norm": 1.7072539329528809, "learning_rate": 5.371643310031183e-07, "loss": 0.7602, "step": 24222 }, { "epoch": 0.8556000512166154, "grad_norm": 1.7762768268585205, "learning_rate": 5.369064326437101e-07, "loss": 0.7513, "step": 24223 }, { "epoch": 0.8556353730203233, "grad_norm": 2.1529970169067383, "learning_rate": 5.366485926964327e-07, "loss": 0.7932, "step": 24224 }, { "epoch": 0.8556706948240312, "grad_norm": 1.6916382312774658, "learning_rate": 5.363908111646587e-07, "loss": 0.7795, "step": 24225 }, { "epoch": 0.855706016627739, "grad_norm": 1.7247692346572876, "learning_rate": 5.361330880517623e-07, "loss": 0.7772, "step": 24226 }, { "epoch": 0.855741338431447, "grad_norm": 1.6666635274887085, "learning_rate": 5.358754233611185e-07, "loss": 0.7689, "step": 24227 }, { "epoch": 0.8557766602351549, "grad_norm": 1.634723424911499, "learning_rate": 5.356178170960963e-07, "loss": 0.7431, "step": 24228 }, { "epoch": 0.8558119820388628, "grad_norm": 1.6924846172332764, "learning_rate": 5.353602692600696e-07, "loss": 0.7989, "step": 24229 }, { "epoch": 0.8558473038425707, "grad_norm": 1.7996925115585327, "learning_rate": 5.351027798564079e-07, "loss": 0.7486, "step": 24230 }, { "epoch": 0.8558826256462786, "grad_norm": 1.7152364253997803, "learning_rate": 5.34845348888483e-07, "loss": 0.768, "step": 24231 }, { "epoch": 0.8559179474499865, "grad_norm": 1.756861925125122, "learning_rate": 5.345879763596618e-07, "loss": 0.7907, "step": 24232 }, { "epoch": 0.8559532692536944, "grad_norm": 1.7579208612442017, "learning_rate": 5.343306622733136e-07, "loss": 0.763, "step": 24233 }, { "epoch": 0.8559885910574023, "grad_norm": 1.6996567249298096, "learning_rate": 5.340734066328069e-07, "loss": 0.7856, "step": 24234 }, { "epoch": 0.8560239128611102, "grad_norm": 2.077775716781616, "learning_rate": 5.338162094415073e-07, "loss": 0.7581, "step": 24235 }, { "epoch": 0.8560592346648181, "grad_norm": 2.365095376968384, "learning_rate": 5.335590707027816e-07, "loss": 0.763, "step": 24236 }, { "epoch": 0.8560945564685261, "grad_norm": 2.1053431034088135, "learning_rate": 5.333019904199965e-07, "loss": 0.7394, "step": 24237 }, { "epoch": 0.856129878272234, "grad_norm": 1.6486735343933105, "learning_rate": 5.330449685965139e-07, "loss": 0.7286, "step": 24238 }, { "epoch": 0.8561652000759419, "grad_norm": 1.7591803073883057, "learning_rate": 5.327880052357004e-07, "loss": 0.7523, "step": 24239 }, { "epoch": 0.8562005218796498, "grad_norm": 1.7496381998062134, "learning_rate": 5.325311003409162e-07, "loss": 0.7783, "step": 24240 }, { "epoch": 0.8562358436833577, "grad_norm": 1.806545615196228, "learning_rate": 5.322742539155267e-07, "loss": 0.7641, "step": 24241 }, { "epoch": 0.8562711654870656, "grad_norm": 1.7422289848327637, "learning_rate": 5.320174659628907e-07, "loss": 0.7629, "step": 24242 }, { "epoch": 0.8563064872907735, "grad_norm": 1.6822649240493774, "learning_rate": 5.317607364863703e-07, "loss": 0.785, "step": 24243 }, { "epoch": 0.8563418090944814, "grad_norm": 1.6757566928863525, "learning_rate": 5.315040654893261e-07, "loss": 0.78, "step": 24244 }, { "epoch": 0.8563771308981893, "grad_norm": 1.7775670289993286, "learning_rate": 5.31247452975116e-07, "loss": 0.7389, "step": 24245 }, { "epoch": 0.8564124527018973, "grad_norm": 1.632468342781067, "learning_rate": 5.309908989470991e-07, "loss": 0.7281, "step": 24246 }, { "epoch": 0.8564477745056052, "grad_norm": 1.5774500370025635, "learning_rate": 5.307344034086342e-07, "loss": 0.7592, "step": 24247 }, { "epoch": 0.8564830963093131, "grad_norm": 1.621463656425476, "learning_rate": 5.304779663630766e-07, "loss": 0.7717, "step": 24248 }, { "epoch": 0.856518418113021, "grad_norm": 1.691412329673767, "learning_rate": 5.302215878137834e-07, "loss": 0.7577, "step": 24249 }, { "epoch": 0.8565537399167289, "grad_norm": 1.793630838394165, "learning_rate": 5.299652677641104e-07, "loss": 0.7699, "step": 24250 }, { "epoch": 0.8565890617204368, "grad_norm": 1.6405662298202515, "learning_rate": 5.297090062174104e-07, "loss": 0.7633, "step": 24251 }, { "epoch": 0.8566243835241446, "grad_norm": 1.7157790660858154, "learning_rate": 5.294528031770391e-07, "loss": 0.7467, "step": 24252 }, { "epoch": 0.8566597053278525, "grad_norm": 2.0806808471679688, "learning_rate": 5.291966586463499e-07, "loss": 0.7738, "step": 24253 }, { "epoch": 0.8566950271315604, "grad_norm": 1.841143012046814, "learning_rate": 5.289405726286934e-07, "loss": 0.7575, "step": 24254 }, { "epoch": 0.8567303489352683, "grad_norm": 1.8090059757232666, "learning_rate": 5.286845451274225e-07, "loss": 0.7751, "step": 24255 }, { "epoch": 0.8567656707389762, "grad_norm": 2.7628753185272217, "learning_rate": 5.284285761458885e-07, "loss": 0.7508, "step": 24256 }, { "epoch": 0.8568009925426842, "grad_norm": 1.6463449001312256, "learning_rate": 5.281726656874398e-07, "loss": 0.7588, "step": 24257 }, { "epoch": 0.8568363143463921, "grad_norm": 2.5875329971313477, "learning_rate": 5.279168137554275e-07, "loss": 0.7663, "step": 24258 }, { "epoch": 0.8568716361501, "grad_norm": 1.8370091915130615, "learning_rate": 5.276610203531984e-07, "loss": 0.7715, "step": 24259 }, { "epoch": 0.8569069579538079, "grad_norm": 1.747304081916809, "learning_rate": 5.274052854841022e-07, "loss": 0.7543, "step": 24260 }, { "epoch": 0.8569422797575158, "grad_norm": 1.5254274606704712, "learning_rate": 5.271496091514839e-07, "loss": 0.7392, "step": 24261 }, { "epoch": 0.8569776015612237, "grad_norm": 2.7197728157043457, "learning_rate": 5.268939913586907e-07, "loss": 0.7566, "step": 24262 }, { "epoch": 0.8570129233649316, "grad_norm": 1.6500602960586548, "learning_rate": 5.26638432109069e-07, "loss": 0.7532, "step": 24263 }, { "epoch": 0.8570482451686395, "grad_norm": 1.796985149383545, "learning_rate": 5.263829314059621e-07, "loss": 0.7878, "step": 24264 }, { "epoch": 0.8570835669723474, "grad_norm": 1.657671570777893, "learning_rate": 5.261274892527136e-07, "loss": 0.7453, "step": 24265 }, { "epoch": 0.8571188887760554, "grad_norm": 1.644221544265747, "learning_rate": 5.258721056526689e-07, "loss": 0.7498, "step": 24266 }, { "epoch": 0.8571542105797633, "grad_norm": 1.6220788955688477, "learning_rate": 5.256167806091683e-07, "loss": 0.7278, "step": 24267 }, { "epoch": 0.8571895323834712, "grad_norm": 1.8448548316955566, "learning_rate": 5.253615141255536e-07, "loss": 0.7788, "step": 24268 }, { "epoch": 0.8572248541871791, "grad_norm": 1.649357795715332, "learning_rate": 5.251063062051681e-07, "loss": 0.7561, "step": 24269 }, { "epoch": 0.857260175990887, "grad_norm": 1.5629810094833374, "learning_rate": 5.248511568513482e-07, "loss": 0.7398, "step": 24270 }, { "epoch": 0.8572954977945949, "grad_norm": 1.7402113676071167, "learning_rate": 5.245960660674354e-07, "loss": 0.7789, "step": 24271 }, { "epoch": 0.8573308195983028, "grad_norm": 1.6306428909301758, "learning_rate": 5.243410338567695e-07, "loss": 0.7722, "step": 24272 }, { "epoch": 0.8573661414020107, "grad_norm": 2.1610524654388428, "learning_rate": 5.240860602226855e-07, "loss": 0.7453, "step": 24273 }, { "epoch": 0.8574014632057186, "grad_norm": 1.6922916173934937, "learning_rate": 5.238311451685213e-07, "loss": 0.7387, "step": 24274 }, { "epoch": 0.8574367850094265, "grad_norm": 1.027796745300293, "learning_rate": 5.235762886976153e-07, "loss": 0.5928, "step": 24275 }, { "epoch": 0.8574721068131345, "grad_norm": 1.7444581985473633, "learning_rate": 5.233214908133011e-07, "loss": 0.7636, "step": 24276 }, { "epoch": 0.8575074286168424, "grad_norm": 1.6006938219070435, "learning_rate": 5.230667515189125e-07, "loss": 0.7433, "step": 24277 }, { "epoch": 0.8575427504205502, "grad_norm": 1.7263884544372559, "learning_rate": 5.228120708177847e-07, "loss": 0.7304, "step": 24278 }, { "epoch": 0.8575780722242581, "grad_norm": 1.745762825012207, "learning_rate": 5.225574487132523e-07, "loss": 0.7764, "step": 24279 }, { "epoch": 0.857613394027966, "grad_norm": 1.7358038425445557, "learning_rate": 5.223028852086453e-07, "loss": 0.7609, "step": 24280 }, { "epoch": 0.8576487158316739, "grad_norm": 2.294308662414551, "learning_rate": 5.220483803072962e-07, "loss": 0.748, "step": 24281 }, { "epoch": 0.8576840376353818, "grad_norm": 1.8243385553359985, "learning_rate": 5.217939340125372e-07, "loss": 0.7284, "step": 24282 }, { "epoch": 0.8577193594390897, "grad_norm": 1.7177717685699463, "learning_rate": 5.215395463276962e-07, "loss": 0.7512, "step": 24283 }, { "epoch": 0.8577546812427976, "grad_norm": 1.6546003818511963, "learning_rate": 5.21285217256104e-07, "loss": 0.7899, "step": 24284 }, { "epoch": 0.8577900030465055, "grad_norm": 1.6474179029464722, "learning_rate": 5.210309468010899e-07, "loss": 0.775, "step": 24285 }, { "epoch": 0.8578253248502135, "grad_norm": 1.7673107385635376, "learning_rate": 5.207767349659798e-07, "loss": 0.7523, "step": 24286 }, { "epoch": 0.8578606466539214, "grad_norm": 1.546406626701355, "learning_rate": 5.205225817541021e-07, "loss": 0.7421, "step": 24287 }, { "epoch": 0.8578959684576293, "grad_norm": 1.8254238367080688, "learning_rate": 5.202684871687824e-07, "loss": 0.7681, "step": 24288 }, { "epoch": 0.8579312902613372, "grad_norm": 1.6503524780273438, "learning_rate": 5.200144512133476e-07, "loss": 0.7639, "step": 24289 }, { "epoch": 0.8579666120650451, "grad_norm": 1.6014246940612793, "learning_rate": 5.197604738911205e-07, "loss": 0.759, "step": 24290 }, { "epoch": 0.858001933868753, "grad_norm": 1.809740662574768, "learning_rate": 5.195065552054263e-07, "loss": 0.7967, "step": 24291 }, { "epoch": 0.8580372556724609, "grad_norm": 1.7203032970428467, "learning_rate": 5.192526951595888e-07, "loss": 0.7799, "step": 24292 }, { "epoch": 0.8580725774761688, "grad_norm": 1.626752495765686, "learning_rate": 5.189988937569295e-07, "loss": 0.7366, "step": 24293 }, { "epoch": 0.8581078992798767, "grad_norm": 1.6092712879180908, "learning_rate": 5.187451510007696e-07, "loss": 0.7344, "step": 24294 }, { "epoch": 0.8581432210835847, "grad_norm": 1.8049473762512207, "learning_rate": 5.18491466894433e-07, "loss": 0.7517, "step": 24295 }, { "epoch": 0.8581785428872926, "grad_norm": 1.75448477268219, "learning_rate": 5.182378414412359e-07, "loss": 0.8173, "step": 24296 }, { "epoch": 0.8582138646910005, "grad_norm": 1.6479209661483765, "learning_rate": 5.179842746444991e-07, "loss": 0.7411, "step": 24297 }, { "epoch": 0.8582491864947084, "grad_norm": 1.6801018714904785, "learning_rate": 5.177307665075416e-07, "loss": 0.7863, "step": 24298 }, { "epoch": 0.8582845082984163, "grad_norm": 1.6894503831863403, "learning_rate": 5.174773170336822e-07, "loss": 0.763, "step": 24299 }, { "epoch": 0.8583198301021242, "grad_norm": 2.4013407230377197, "learning_rate": 5.172239262262363e-07, "loss": 0.7692, "step": 24300 }, { "epoch": 0.8583551519058321, "grad_norm": 1.8218498229980469, "learning_rate": 5.169705940885211e-07, "loss": 0.7668, "step": 24301 }, { "epoch": 0.85839047370954, "grad_norm": 1.7304548025131226, "learning_rate": 5.167173206238529e-07, "loss": 0.7782, "step": 24302 }, { "epoch": 0.8584257955132479, "grad_norm": 2.0214345455169678, "learning_rate": 5.164641058355446e-07, "loss": 0.7283, "step": 24303 }, { "epoch": 0.8584611173169557, "grad_norm": 1.9324520826339722, "learning_rate": 5.162109497269108e-07, "loss": 0.7655, "step": 24304 }, { "epoch": 0.8584964391206636, "grad_norm": 4.030216217041016, "learning_rate": 5.159578523012671e-07, "loss": 0.712, "step": 24305 }, { "epoch": 0.8585317609243716, "grad_norm": 1.6864898204803467, "learning_rate": 5.157048135619225e-07, "loss": 0.7767, "step": 24306 }, { "epoch": 0.8585670827280795, "grad_norm": 1.9063801765441895, "learning_rate": 5.154518335121905e-07, "loss": 0.7588, "step": 24307 }, { "epoch": 0.8586024045317874, "grad_norm": 1.770664930343628, "learning_rate": 5.151989121553829e-07, "loss": 0.7737, "step": 24308 }, { "epoch": 0.8586377263354953, "grad_norm": 2.5070009231567383, "learning_rate": 5.149460494948083e-07, "loss": 0.7781, "step": 24309 }, { "epoch": 0.8586730481392032, "grad_norm": 1.6501755714416504, "learning_rate": 5.146932455337761e-07, "loss": 0.7533, "step": 24310 }, { "epoch": 0.8587083699429111, "grad_norm": 1.6970462799072266, "learning_rate": 5.144405002755969e-07, "loss": 0.7698, "step": 24311 }, { "epoch": 0.858743691746619, "grad_norm": 1.68373703956604, "learning_rate": 5.141878137235761e-07, "loss": 0.7582, "step": 24312 }, { "epoch": 0.8587790135503269, "grad_norm": 1.6144684553146362, "learning_rate": 5.139351858810227e-07, "loss": 0.7587, "step": 24313 }, { "epoch": 0.8588143353540348, "grad_norm": 1.7730000019073486, "learning_rate": 5.136826167512416e-07, "loss": 0.7586, "step": 24314 }, { "epoch": 0.8588496571577428, "grad_norm": 1.9075989723205566, "learning_rate": 5.134301063375402e-07, "loss": 0.7761, "step": 24315 }, { "epoch": 0.8588849789614507, "grad_norm": 1.6879827976226807, "learning_rate": 5.13177654643221e-07, "loss": 0.7952, "step": 24316 }, { "epoch": 0.8589203007651586, "grad_norm": 1.989713191986084, "learning_rate": 5.129252616715896e-07, "loss": 0.7513, "step": 24317 }, { "epoch": 0.8589556225688665, "grad_norm": 2.0079007148742676, "learning_rate": 5.126729274259495e-07, "loss": 0.7614, "step": 24318 }, { "epoch": 0.8589909443725744, "grad_norm": 2.2247262001037598, "learning_rate": 5.124206519096015e-07, "loss": 0.7765, "step": 24319 }, { "epoch": 0.8590262661762823, "grad_norm": 1.8169721364974976, "learning_rate": 5.121684351258488e-07, "loss": 0.7282, "step": 24320 }, { "epoch": 0.8590615879799902, "grad_norm": 1.6397019624710083, "learning_rate": 5.119162770779929e-07, "loss": 0.7804, "step": 24321 }, { "epoch": 0.8590969097836981, "grad_norm": 1.6315568685531616, "learning_rate": 5.116641777693315e-07, "loss": 0.7653, "step": 24322 }, { "epoch": 0.859132231587406, "grad_norm": 1.5910738706588745, "learning_rate": 5.114121372031666e-07, "loss": 0.8231, "step": 24323 }, { "epoch": 0.859167553391114, "grad_norm": 1.605545997619629, "learning_rate": 5.111601553827961e-07, "loss": 0.7634, "step": 24324 }, { "epoch": 0.8592028751948219, "grad_norm": 1.6520936489105225, "learning_rate": 5.109082323115172e-07, "loss": 0.7397, "step": 24325 }, { "epoch": 0.8592381969985298, "grad_norm": 1.4957027435302734, "learning_rate": 5.106563679926274e-07, "loss": 0.7442, "step": 24326 }, { "epoch": 0.8592735188022377, "grad_norm": 1.8823715448379517, "learning_rate": 5.10404562429424e-07, "loss": 0.7711, "step": 24327 }, { "epoch": 0.8593088406059456, "grad_norm": 1.7650612592697144, "learning_rate": 5.101528156252006e-07, "loss": 0.7851, "step": 24328 }, { "epoch": 0.8593441624096535, "grad_norm": 1.7600401639938354, "learning_rate": 5.099011275832533e-07, "loss": 0.7512, "step": 24329 }, { "epoch": 0.8593794842133613, "grad_norm": 1.7848211526870728, "learning_rate": 5.096494983068772e-07, "loss": 0.7629, "step": 24330 }, { "epoch": 0.8594148060170692, "grad_norm": 1.6252367496490479, "learning_rate": 5.093979277993632e-07, "loss": 0.7709, "step": 24331 }, { "epoch": 0.8594501278207771, "grad_norm": 1.8046464920043945, "learning_rate": 5.091464160640059e-07, "loss": 0.766, "step": 24332 }, { "epoch": 0.859485449624485, "grad_norm": 1.5648280382156372, "learning_rate": 5.088949631040951e-07, "loss": 0.7853, "step": 24333 }, { "epoch": 0.8595207714281929, "grad_norm": 1.6372236013412476, "learning_rate": 5.086435689229241e-07, "loss": 0.7131, "step": 24334 }, { "epoch": 0.8595560932319009, "grad_norm": 1.6532936096191406, "learning_rate": 5.083922335237807e-07, "loss": 0.7293, "step": 24335 }, { "epoch": 0.8595914150356088, "grad_norm": 1.7931028604507446, "learning_rate": 5.081409569099549e-07, "loss": 0.7835, "step": 24336 }, { "epoch": 0.8596267368393167, "grad_norm": 1.6545485258102417, "learning_rate": 5.078897390847376e-07, "loss": 0.7723, "step": 24337 }, { "epoch": 0.8596620586430246, "grad_norm": 1.6239429712295532, "learning_rate": 5.076385800514133e-07, "loss": 0.7593, "step": 24338 }, { "epoch": 0.8596973804467325, "grad_norm": 1.5310661792755127, "learning_rate": 5.073874798132711e-07, "loss": 0.7921, "step": 24339 }, { "epoch": 0.8597327022504404, "grad_norm": 1.6495561599731445, "learning_rate": 5.071364383735983e-07, "loss": 0.7417, "step": 24340 }, { "epoch": 0.8597680240541483, "grad_norm": 1.8244600296020508, "learning_rate": 5.068854557356778e-07, "loss": 0.7858, "step": 24341 }, { "epoch": 0.8598033458578562, "grad_norm": 1.557658314704895, "learning_rate": 5.06634531902796e-07, "loss": 0.7627, "step": 24342 }, { "epoch": 0.8598386676615641, "grad_norm": 1.823698878288269, "learning_rate": 5.063836668782379e-07, "loss": 0.8014, "step": 24343 }, { "epoch": 0.859873989465272, "grad_norm": 1.7047278881072998, "learning_rate": 5.061328606652849e-07, "loss": 0.7535, "step": 24344 }, { "epoch": 0.85990931126898, "grad_norm": 1.6187514066696167, "learning_rate": 5.058821132672199e-07, "loss": 0.7479, "step": 24345 }, { "epoch": 0.8599446330726879, "grad_norm": 3.2172811031341553, "learning_rate": 5.05631424687325e-07, "loss": 0.7988, "step": 24346 }, { "epoch": 0.8599799548763958, "grad_norm": 1.6546077728271484, "learning_rate": 5.053807949288819e-07, "loss": 0.7469, "step": 24347 }, { "epoch": 0.8600152766801037, "grad_norm": 1.8969082832336426, "learning_rate": 5.051302239951695e-07, "loss": 0.7489, "step": 24348 }, { "epoch": 0.8600505984838116, "grad_norm": 1.6629693508148193, "learning_rate": 5.048797118894677e-07, "loss": 0.7738, "step": 24349 }, { "epoch": 0.8600859202875195, "grad_norm": 1.8749005794525146, "learning_rate": 5.046292586150576e-07, "loss": 0.7961, "step": 24350 }, { "epoch": 0.8601212420912274, "grad_norm": 1.5971773862838745, "learning_rate": 5.043788641752117e-07, "loss": 0.73, "step": 24351 }, { "epoch": 0.8601565638949353, "grad_norm": 1.9347491264343262, "learning_rate": 5.041285285732111e-07, "loss": 0.7349, "step": 24352 }, { "epoch": 0.8601918856986432, "grad_norm": 1.8067713975906372, "learning_rate": 5.03878251812332e-07, "loss": 0.733, "step": 24353 }, { "epoch": 0.8602272075023512, "grad_norm": 1.8176720142364502, "learning_rate": 5.036280338958476e-07, "loss": 0.7688, "step": 24354 }, { "epoch": 0.8602625293060591, "grad_norm": 1.7797762155532837, "learning_rate": 5.033778748270352e-07, "loss": 0.7182, "step": 24355 }, { "epoch": 0.860297851109767, "grad_norm": 1.8204106092453003, "learning_rate": 5.031277746091673e-07, "loss": 0.7377, "step": 24356 }, { "epoch": 0.8603331729134748, "grad_norm": 1.629992127418518, "learning_rate": 5.028777332455187e-07, "loss": 0.7854, "step": 24357 }, { "epoch": 0.8603684947171827, "grad_norm": 1.739898681640625, "learning_rate": 5.026277507393601e-07, "loss": 0.7829, "step": 24358 }, { "epoch": 0.8604038165208906, "grad_norm": 1.7752180099487305, "learning_rate": 5.023778270939644e-07, "loss": 0.7461, "step": 24359 }, { "epoch": 0.8604391383245985, "grad_norm": 1.7683742046356201, "learning_rate": 5.021279623126029e-07, "loss": 0.7282, "step": 24360 }, { "epoch": 0.8604744601283064, "grad_norm": 1.6095048189163208, "learning_rate": 5.018781563985442e-07, "loss": 0.7595, "step": 24361 }, { "epoch": 0.8605097819320143, "grad_norm": 1.63233482837677, "learning_rate": 5.016284093550583e-07, "loss": 0.8011, "step": 24362 }, { "epoch": 0.8605451037357222, "grad_norm": 1.8756481409072876, "learning_rate": 5.013787211854154e-07, "loss": 0.7647, "step": 24363 }, { "epoch": 0.8605804255394301, "grad_norm": 1.595621943473816, "learning_rate": 5.011290918928813e-07, "loss": 0.763, "step": 24364 }, { "epoch": 0.8606157473431381, "grad_norm": 2.118710994720459, "learning_rate": 5.008795214807238e-07, "loss": 0.7567, "step": 24365 }, { "epoch": 0.860651069146846, "grad_norm": 1.821195125579834, "learning_rate": 5.006300099522104e-07, "loss": 0.7599, "step": 24366 }, { "epoch": 0.8606863909505539, "grad_norm": 1.8430149555206299, "learning_rate": 5.003805573106052e-07, "loss": 0.7296, "step": 24367 }, { "epoch": 0.8607217127542618, "grad_norm": 1.5698387622833252, "learning_rate": 5.001311635591727e-07, "loss": 0.7443, "step": 24368 }, { "epoch": 0.8607570345579697, "grad_norm": 1.632299780845642, "learning_rate": 4.998818287011787e-07, "loss": 0.7308, "step": 24369 }, { "epoch": 0.8607923563616776, "grad_norm": 1.739646077156067, "learning_rate": 4.996325527398854e-07, "loss": 0.7774, "step": 24370 }, { "epoch": 0.8608276781653855, "grad_norm": 0.9995728731155396, "learning_rate": 4.993833356785549e-07, "loss": 0.5989, "step": 24371 }, { "epoch": 0.8608629999690934, "grad_norm": 1.944847583770752, "learning_rate": 4.991341775204489e-07, "loss": 0.7992, "step": 24372 }, { "epoch": 0.8608983217728013, "grad_norm": 1.7364742755889893, "learning_rate": 4.988850782688293e-07, "loss": 0.7358, "step": 24373 }, { "epoch": 0.8609336435765093, "grad_norm": 1.601896047592163, "learning_rate": 4.98636037926955e-07, "loss": 0.7351, "step": 24374 }, { "epoch": 0.8609689653802172, "grad_norm": 1.97445809841156, "learning_rate": 4.983870564980864e-07, "loss": 0.8042, "step": 24375 }, { "epoch": 0.8610042871839251, "grad_norm": 1.5413132905960083, "learning_rate": 4.981381339854824e-07, "loss": 0.7582, "step": 24376 }, { "epoch": 0.861039608987633, "grad_norm": 1.907129168510437, "learning_rate": 4.978892703923993e-07, "loss": 0.7768, "step": 24377 }, { "epoch": 0.8610749307913409, "grad_norm": 1.753427267074585, "learning_rate": 4.976404657220951e-07, "loss": 0.753, "step": 24378 }, { "epoch": 0.8611102525950488, "grad_norm": 2.3273370265960693, "learning_rate": 4.973917199778272e-07, "loss": 0.7905, "step": 24379 }, { "epoch": 0.8611455743987567, "grad_norm": 1.6844359636306763, "learning_rate": 4.971430331628491e-07, "loss": 0.7643, "step": 24380 }, { "epoch": 0.8611808962024646, "grad_norm": 1.854211688041687, "learning_rate": 4.968944052804164e-07, "loss": 0.7627, "step": 24381 }, { "epoch": 0.8612162180061725, "grad_norm": 1.5531080961227417, "learning_rate": 4.966458363337845e-07, "loss": 0.769, "step": 24382 }, { "epoch": 0.8612515398098803, "grad_norm": 1.7904728651046753, "learning_rate": 4.96397326326204e-07, "loss": 0.7723, "step": 24383 }, { "epoch": 0.8612868616135883, "grad_norm": 1.6267642974853516, "learning_rate": 4.961488752609289e-07, "loss": 0.7681, "step": 24384 }, { "epoch": 0.8613221834172962, "grad_norm": 1.5527632236480713, "learning_rate": 4.959004831412118e-07, "loss": 0.7775, "step": 24385 }, { "epoch": 0.8613575052210041, "grad_norm": 1.4829310178756714, "learning_rate": 4.956521499703016e-07, "loss": 0.7158, "step": 24386 }, { "epoch": 0.861392827024712, "grad_norm": 2.183385133743286, "learning_rate": 4.954038757514495e-07, "loss": 0.7714, "step": 24387 }, { "epoch": 0.8614281488284199, "grad_norm": 1.9372276067733765, "learning_rate": 4.951556604879049e-07, "loss": 0.7934, "step": 24388 }, { "epoch": 0.8614634706321278, "grad_norm": 2.9316582679748535, "learning_rate": 4.949075041829166e-07, "loss": 0.8, "step": 24389 }, { "epoch": 0.8614987924358357, "grad_norm": 1.7602556943893433, "learning_rate": 4.946594068397309e-07, "loss": 0.7945, "step": 24390 }, { "epoch": 0.8615341142395436, "grad_norm": 1.751742959022522, "learning_rate": 4.944113684615958e-07, "loss": 0.773, "step": 24391 }, { "epoch": 0.8615694360432515, "grad_norm": 1.6455130577087402, "learning_rate": 4.941633890517595e-07, "loss": 0.7704, "step": 24392 }, { "epoch": 0.8616047578469594, "grad_norm": 1.6818221807479858, "learning_rate": 4.93915468613464e-07, "loss": 0.7691, "step": 24393 }, { "epoch": 0.8616400796506674, "grad_norm": 1.7474315166473389, "learning_rate": 4.936676071499563e-07, "loss": 0.8304, "step": 24394 }, { "epoch": 0.8616754014543753, "grad_norm": 1.887149453163147, "learning_rate": 4.934198046644806e-07, "loss": 0.7342, "step": 24395 }, { "epoch": 0.8617107232580832, "grad_norm": 1.5678623914718628, "learning_rate": 4.931720611602786e-07, "loss": 0.7697, "step": 24396 }, { "epoch": 0.8617460450617911, "grad_norm": 1.814221739768982, "learning_rate": 4.929243766405933e-07, "loss": 0.7685, "step": 24397 }, { "epoch": 0.861781366865499, "grad_norm": 1.7248806953430176, "learning_rate": 4.926767511086672e-07, "loss": 0.7491, "step": 24398 }, { "epoch": 0.8618166886692069, "grad_norm": 1.6307120323181152, "learning_rate": 4.924291845677398e-07, "loss": 0.7534, "step": 24399 }, { "epoch": 0.8618520104729148, "grad_norm": 2.020333766937256, "learning_rate": 4.921816770210519e-07, "loss": 0.7535, "step": 24400 }, { "epoch": 0.8618873322766227, "grad_norm": 1.798793911933899, "learning_rate": 4.919342284718442e-07, "loss": 0.7661, "step": 24401 }, { "epoch": 0.8619226540803306, "grad_norm": 1.833305835723877, "learning_rate": 4.916868389233525e-07, "loss": 0.7697, "step": 24402 }, { "epoch": 0.8619579758840386, "grad_norm": 1.7260410785675049, "learning_rate": 4.914395083788159e-07, "loss": 0.7549, "step": 24403 }, { "epoch": 0.8619932976877465, "grad_norm": 0.9527677297592163, "learning_rate": 4.911922368414718e-07, "loss": 0.5774, "step": 24404 }, { "epoch": 0.8620286194914544, "grad_norm": 1.6340473890304565, "learning_rate": 4.909450243145569e-07, "loss": 0.7288, "step": 24405 }, { "epoch": 0.8620639412951623, "grad_norm": 1.6795201301574707, "learning_rate": 4.90697870801306e-07, "loss": 0.7629, "step": 24406 }, { "epoch": 0.8620992630988702, "grad_norm": 1.693257212638855, "learning_rate": 4.904507763049527e-07, "loss": 0.7674, "step": 24407 }, { "epoch": 0.8621345849025781, "grad_norm": 1.803052544593811, "learning_rate": 4.90203740828733e-07, "loss": 0.7742, "step": 24408 }, { "epoch": 0.8621699067062859, "grad_norm": 1.5650615692138672, "learning_rate": 4.899567643758779e-07, "loss": 0.7573, "step": 24409 }, { "epoch": 0.8622052285099938, "grad_norm": 1.6031638383865356, "learning_rate": 4.897098469496209e-07, "loss": 0.744, "step": 24410 }, { "epoch": 0.8622405503137017, "grad_norm": 1.8314063549041748, "learning_rate": 4.894629885531943e-07, "loss": 0.8011, "step": 24411 }, { "epoch": 0.8622758721174096, "grad_norm": 1.7573344707489014, "learning_rate": 4.892161891898278e-07, "loss": 0.7622, "step": 24412 }, { "epoch": 0.8623111939211175, "grad_norm": 1.579533576965332, "learning_rate": 4.889694488627512e-07, "loss": 0.7698, "step": 24413 }, { "epoch": 0.8623465157248255, "grad_norm": 1.7444024085998535, "learning_rate": 4.887227675751949e-07, "loss": 0.7471, "step": 24414 }, { "epoch": 0.8623818375285334, "grad_norm": 1.731958031654358, "learning_rate": 4.884761453303882e-07, "loss": 0.7713, "step": 24415 }, { "epoch": 0.8624171593322413, "grad_norm": 1.639779806137085, "learning_rate": 4.882295821315564e-07, "loss": 0.7604, "step": 24416 }, { "epoch": 0.8624524811359492, "grad_norm": 1.6396108865737915, "learning_rate": 4.879830779819278e-07, "loss": 0.7522, "step": 24417 }, { "epoch": 0.8624878029396571, "grad_norm": 1.679405689239502, "learning_rate": 4.877366328847288e-07, "loss": 0.7381, "step": 24418 }, { "epoch": 0.862523124743365, "grad_norm": 1.774450659751892, "learning_rate": 4.874902468431846e-07, "loss": 0.7524, "step": 24419 }, { "epoch": 0.8625584465470729, "grad_norm": 1.7598716020584106, "learning_rate": 4.872439198605189e-07, "loss": 0.7498, "step": 24420 }, { "epoch": 0.8625937683507808, "grad_norm": 3.090973138809204, "learning_rate": 4.869976519399583e-07, "loss": 0.7678, "step": 24421 }, { "epoch": 0.8626290901544887, "grad_norm": 1.7205249071121216, "learning_rate": 4.867514430847225e-07, "loss": 0.7736, "step": 24422 }, { "epoch": 0.8626644119581967, "grad_norm": 1.7678800821304321, "learning_rate": 4.865052932980358e-07, "loss": 0.7462, "step": 24423 }, { "epoch": 0.8626997337619046, "grad_norm": 1.6959425210952759, "learning_rate": 4.862592025831197e-07, "loss": 0.771, "step": 24424 }, { "epoch": 0.8627350555656125, "grad_norm": 1.6812756061553955, "learning_rate": 4.860131709431953e-07, "loss": 0.7583, "step": 24425 }, { "epoch": 0.8627703773693204, "grad_norm": 1.7710936069488525, "learning_rate": 4.85767198381481e-07, "loss": 0.7873, "step": 24426 }, { "epoch": 0.8628056991730283, "grad_norm": 1.6649125814437866, "learning_rate": 4.855212849011964e-07, "loss": 0.7706, "step": 24427 }, { "epoch": 0.8628410209767362, "grad_norm": 1.612605094909668, "learning_rate": 4.852754305055624e-07, "loss": 0.7687, "step": 24428 }, { "epoch": 0.8628763427804441, "grad_norm": 1.728048324584961, "learning_rate": 4.850296351977935e-07, "loss": 0.771, "step": 24429 }, { "epoch": 0.862911664584152, "grad_norm": 1.8155875205993652, "learning_rate": 4.847838989811082e-07, "loss": 0.74, "step": 24430 }, { "epoch": 0.8629469863878599, "grad_norm": 1.6431822776794434, "learning_rate": 4.84538221858723e-07, "loss": 0.7922, "step": 24431 }, { "epoch": 0.8629823081915678, "grad_norm": 1.9854177236557007, "learning_rate": 4.842926038338519e-07, "loss": 0.8039, "step": 24432 }, { "epoch": 0.8630176299952758, "grad_norm": 1.7986210584640503, "learning_rate": 4.840470449097107e-07, "loss": 0.7641, "step": 24433 }, { "epoch": 0.8630529517989837, "grad_norm": 1.7036077976226807, "learning_rate": 4.838015450895134e-07, "loss": 0.7824, "step": 24434 }, { "epoch": 0.8630882736026915, "grad_norm": 1.6802480220794678, "learning_rate": 4.83556104376472e-07, "loss": 0.7835, "step": 24435 }, { "epoch": 0.8631235954063994, "grad_norm": 1.8403605222702026, "learning_rate": 4.833107227737994e-07, "loss": 0.7405, "step": 24436 }, { "epoch": 0.8631589172101073, "grad_norm": 1.5552761554718018, "learning_rate": 4.830654002847074e-07, "loss": 0.7595, "step": 24437 }, { "epoch": 0.8631942390138152, "grad_norm": 1.6325404644012451, "learning_rate": 4.828201369124058e-07, "loss": 0.7612, "step": 24438 }, { "epoch": 0.8632295608175231, "grad_norm": 1.6971133947372437, "learning_rate": 4.825749326601048e-07, "loss": 0.7654, "step": 24439 }, { "epoch": 0.863264882621231, "grad_norm": 1.745081901550293, "learning_rate": 4.823297875310151e-07, "loss": 0.7734, "step": 24440 }, { "epoch": 0.8633002044249389, "grad_norm": 1.7578256130218506, "learning_rate": 4.820847015283425e-07, "loss": 0.7317, "step": 24441 }, { "epoch": 0.8633355262286468, "grad_norm": 1.9176172018051147, "learning_rate": 4.818396746552967e-07, "loss": 0.7575, "step": 24442 }, { "epoch": 0.8633708480323548, "grad_norm": 1.7510136365890503, "learning_rate": 4.815947069150845e-07, "loss": 0.8053, "step": 24443 }, { "epoch": 0.8634061698360627, "grad_norm": 1.7470624446868896, "learning_rate": 4.813497983109117e-07, "loss": 0.8004, "step": 24444 }, { "epoch": 0.8634414916397706, "grad_norm": 1.958941102027893, "learning_rate": 4.811049488459818e-07, "loss": 0.7882, "step": 24445 }, { "epoch": 0.8634768134434785, "grad_norm": 1.6319284439086914, "learning_rate": 4.808601585235017e-07, "loss": 0.7796, "step": 24446 }, { "epoch": 0.8635121352471864, "grad_norm": 1.6805412769317627, "learning_rate": 4.806154273466746e-07, "loss": 0.7729, "step": 24447 }, { "epoch": 0.8635474570508943, "grad_norm": 1.6981621980667114, "learning_rate": 4.803707553187026e-07, "loss": 0.7719, "step": 24448 }, { "epoch": 0.8635827788546022, "grad_norm": 1.7267674207687378, "learning_rate": 4.801261424427883e-07, "loss": 0.7335, "step": 24449 }, { "epoch": 0.8636181006583101, "grad_norm": 1.9928805828094482, "learning_rate": 4.798815887221347e-07, "loss": 0.7278, "step": 24450 }, { "epoch": 0.863653422462018, "grad_norm": 1.8254530429840088, "learning_rate": 4.796370941599399e-07, "loss": 0.7879, "step": 24451 }, { "epoch": 0.863688744265726, "grad_norm": 1.9745506048202515, "learning_rate": 4.793926587594055e-07, "loss": 0.7833, "step": 24452 }, { "epoch": 0.8637240660694339, "grad_norm": 1.6707911491394043, "learning_rate": 4.791482825237309e-07, "loss": 0.7794, "step": 24453 }, { "epoch": 0.8637593878731418, "grad_norm": 1.644031286239624, "learning_rate": 4.789039654561128e-07, "loss": 0.7375, "step": 24454 }, { "epoch": 0.8637947096768497, "grad_norm": 1.7742763757705688, "learning_rate": 4.786597075597499e-07, "loss": 0.7678, "step": 24455 }, { "epoch": 0.8638300314805576, "grad_norm": 1.6114789247512817, "learning_rate": 4.784155088378395e-07, "loss": 0.7818, "step": 24456 }, { "epoch": 0.8638653532842655, "grad_norm": 2.526439666748047, "learning_rate": 4.781713692935757e-07, "loss": 0.773, "step": 24457 }, { "epoch": 0.8639006750879734, "grad_norm": 1.7899103164672852, "learning_rate": 4.779272889301556e-07, "loss": 0.7604, "step": 24458 }, { "epoch": 0.8639359968916813, "grad_norm": 1.8800630569458008, "learning_rate": 4.776832677507737e-07, "loss": 0.7706, "step": 24459 }, { "epoch": 0.8639713186953892, "grad_norm": 1.5926588773727417, "learning_rate": 4.774393057586224e-07, "loss": 0.7771, "step": 24460 }, { "epoch": 0.864006640499097, "grad_norm": 1.7879232168197632, "learning_rate": 4.771954029568965e-07, "loss": 0.74, "step": 24461 }, { "epoch": 0.864041962302805, "grad_norm": 0.9230734705924988, "learning_rate": 4.769515593487856e-07, "loss": 0.5782, "step": 24462 }, { "epoch": 0.8640772841065129, "grad_norm": 1.6870752573013306, "learning_rate": 4.767077749374832e-07, "loss": 0.7682, "step": 24463 }, { "epoch": 0.8641126059102208, "grad_norm": 1.8391077518463135, "learning_rate": 4.7646404972617854e-07, "loss": 0.7362, "step": 24464 }, { "epoch": 0.8641479277139287, "grad_norm": 1.6869560480117798, "learning_rate": 4.762203837180618e-07, "loss": 0.7503, "step": 24465 }, { "epoch": 0.8641832495176366, "grad_norm": 1.7880874872207642, "learning_rate": 4.759767769163237e-07, "loss": 0.7639, "step": 24466 }, { "epoch": 0.8642185713213445, "grad_norm": 1.6791752576828003, "learning_rate": 4.7573322932415014e-07, "loss": 0.7544, "step": 24467 }, { "epoch": 0.8642538931250524, "grad_norm": 1.7965974807739258, "learning_rate": 4.75489740944729e-07, "loss": 0.7685, "step": 24468 }, { "epoch": 0.8642892149287603, "grad_norm": 1.787480115890503, "learning_rate": 4.752463117812489e-07, "loss": 0.7537, "step": 24469 }, { "epoch": 0.8643245367324682, "grad_norm": 1.6726181507110596, "learning_rate": 4.75002941836894e-07, "loss": 0.7463, "step": 24470 }, { "epoch": 0.8643598585361761, "grad_norm": 1.7076150178909302, "learning_rate": 4.7475963111484944e-07, "loss": 0.7542, "step": 24471 }, { "epoch": 0.864395180339884, "grad_norm": 1.9662312269210815, "learning_rate": 4.745163796183005e-07, "loss": 0.7647, "step": 24472 }, { "epoch": 0.864430502143592, "grad_norm": 1.5773987770080566, "learning_rate": 4.742731873504314e-07, "loss": 0.7647, "step": 24473 }, { "epoch": 0.8644658239472999, "grad_norm": 1.772332787513733, "learning_rate": 4.740300543144227e-07, "loss": 0.7506, "step": 24474 }, { "epoch": 0.8645011457510078, "grad_norm": 1.7478104829788208, "learning_rate": 4.7378698051345815e-07, "loss": 0.7804, "step": 24475 }, { "epoch": 0.8645364675547157, "grad_norm": 1.6846767663955688, "learning_rate": 4.735439659507196e-07, "loss": 0.7496, "step": 24476 }, { "epoch": 0.8645717893584236, "grad_norm": 1.7033463716506958, "learning_rate": 4.733010106293862e-07, "loss": 0.7442, "step": 24477 }, { "epoch": 0.8646071111621315, "grad_norm": 1.6864681243896484, "learning_rate": 4.7305811455263803e-07, "loss": 0.7547, "step": 24478 }, { "epoch": 0.8646424329658394, "grad_norm": 1.5301542282104492, "learning_rate": 4.7281527772365496e-07, "loss": 0.7635, "step": 24479 }, { "epoch": 0.8646777547695473, "grad_norm": 1.7461612224578857, "learning_rate": 4.7257250014561493e-07, "loss": 0.7495, "step": 24480 }, { "epoch": 0.8647130765732552, "grad_norm": 1.6681241989135742, "learning_rate": 4.7232978182169374e-07, "loss": 0.7434, "step": 24481 }, { "epoch": 0.8647483983769632, "grad_norm": 1.7546428442001343, "learning_rate": 4.720871227550694e-07, "loss": 0.7857, "step": 24482 }, { "epoch": 0.8647837201806711, "grad_norm": 1.8864110708236694, "learning_rate": 4.7184452294891827e-07, "loss": 0.7682, "step": 24483 }, { "epoch": 0.864819041984379, "grad_norm": 1.7134455442428589, "learning_rate": 4.716019824064144e-07, "loss": 0.7569, "step": 24484 }, { "epoch": 0.8648543637880869, "grad_norm": 1.6045078039169312, "learning_rate": 4.71359501130732e-07, "loss": 0.7446, "step": 24485 }, { "epoch": 0.8648896855917948, "grad_norm": 1.5852850675582886, "learning_rate": 4.7111707912504635e-07, "loss": 0.7235, "step": 24486 }, { "epoch": 0.8649250073955026, "grad_norm": 1.7832999229431152, "learning_rate": 4.7087471639252813e-07, "loss": 0.7971, "step": 24487 }, { "epoch": 0.8649603291992105, "grad_norm": 1.633537769317627, "learning_rate": 4.7063241293635044e-07, "loss": 0.7324, "step": 24488 }, { "epoch": 0.8649956510029184, "grad_norm": 1.826496958732605, "learning_rate": 4.703901687596851e-07, "loss": 0.8147, "step": 24489 }, { "epoch": 0.8650309728066263, "grad_norm": 1.7875012159347534, "learning_rate": 4.701479838657008e-07, "loss": 0.7612, "step": 24490 }, { "epoch": 0.8650662946103342, "grad_norm": 1.6463978290557861, "learning_rate": 4.6990585825756775e-07, "loss": 0.7606, "step": 24491 }, { "epoch": 0.8651016164140422, "grad_norm": 2.3601019382476807, "learning_rate": 4.696637919384567e-07, "loss": 0.7637, "step": 24492 }, { "epoch": 0.8651369382177501, "grad_norm": 4.850205421447754, "learning_rate": 4.6942178491153344e-07, "loss": 0.7683, "step": 24493 }, { "epoch": 0.865172260021458, "grad_norm": 1.7716418504714966, "learning_rate": 4.691798371799661e-07, "loss": 0.7913, "step": 24494 }, { "epoch": 0.8652075818251659, "grad_norm": 1.648067593574524, "learning_rate": 4.68937948746922e-07, "loss": 0.8112, "step": 24495 }, { "epoch": 0.8652429036288738, "grad_norm": 2.0219225883483887, "learning_rate": 4.686961196155654e-07, "loss": 0.7656, "step": 24496 }, { "epoch": 0.8652782254325817, "grad_norm": 1.773974061012268, "learning_rate": 4.684543497890626e-07, "loss": 0.7911, "step": 24497 }, { "epoch": 0.8653135472362896, "grad_norm": 1.8266654014587402, "learning_rate": 4.6821263927057824e-07, "loss": 0.7696, "step": 24498 }, { "epoch": 0.8653488690399975, "grad_norm": 1.6992181539535522, "learning_rate": 4.679709880632743e-07, "loss": 0.7832, "step": 24499 }, { "epoch": 0.8653841908437054, "grad_norm": 1.7763690948486328, "learning_rate": 4.6772939617031387e-07, "loss": 0.7877, "step": 24500 }, { "epoch": 0.8654195126474133, "grad_norm": 2.2296385765075684, "learning_rate": 4.674878635948593e-07, "loss": 0.8169, "step": 24501 }, { "epoch": 0.8654548344511213, "grad_norm": 1.6691415309906006, "learning_rate": 4.67246390340072e-07, "loss": 0.7425, "step": 24502 }, { "epoch": 0.8654901562548292, "grad_norm": 2.184551954269409, "learning_rate": 4.6700497640911113e-07, "loss": 0.7924, "step": 24503 }, { "epoch": 0.8655254780585371, "grad_norm": 1.8475427627563477, "learning_rate": 4.667636218051369e-07, "loss": 0.7561, "step": 24504 }, { "epoch": 0.865560799862245, "grad_norm": 1.6231056451797485, "learning_rate": 4.6652232653130956e-07, "loss": 0.7157, "step": 24505 }, { "epoch": 0.8655961216659529, "grad_norm": 1.9776325225830078, "learning_rate": 4.662810905907844e-07, "loss": 0.7618, "step": 24506 }, { "epoch": 0.8656314434696608, "grad_norm": 1.7175401449203491, "learning_rate": 4.6603991398672055e-07, "loss": 0.7503, "step": 24507 }, { "epoch": 0.8656667652733687, "grad_norm": 1.712819218635559, "learning_rate": 4.6579879672227434e-07, "loss": 0.7771, "step": 24508 }, { "epoch": 0.8657020870770766, "grad_norm": 2.0415456295013428, "learning_rate": 4.655577388006011e-07, "loss": 0.749, "step": 24509 }, { "epoch": 0.8657374088807845, "grad_norm": 1.7708185911178589, "learning_rate": 4.6531674022485487e-07, "loss": 0.7673, "step": 24510 }, { "epoch": 0.8657727306844925, "grad_norm": 1.686284065246582, "learning_rate": 4.6507580099819206e-07, "loss": 0.7515, "step": 24511 }, { "epoch": 0.8658080524882004, "grad_norm": 1.8340747356414795, "learning_rate": 4.6483492112376403e-07, "loss": 0.7634, "step": 24512 }, { "epoch": 0.8658433742919082, "grad_norm": 1.6327699422836304, "learning_rate": 4.645941006047233e-07, "loss": 0.756, "step": 24513 }, { "epoch": 0.8658786960956161, "grad_norm": 1.8738069534301758, "learning_rate": 4.64353339444224e-07, "loss": 0.7866, "step": 24514 }, { "epoch": 0.865914017899324, "grad_norm": 1.831031322479248, "learning_rate": 4.6411263764541415e-07, "loss": 0.7606, "step": 24515 }, { "epoch": 0.8659493397030319, "grad_norm": 1.7997158765792847, "learning_rate": 4.6387199521144676e-07, "loss": 0.7741, "step": 24516 }, { "epoch": 0.8659846615067398, "grad_norm": 1.686864972114563, "learning_rate": 4.6363141214546935e-07, "loss": 0.7797, "step": 24517 }, { "epoch": 0.8660199833104477, "grad_norm": 1.540304183959961, "learning_rate": 4.633908884506316e-07, "loss": 0.7786, "step": 24518 }, { "epoch": 0.8660553051141556, "grad_norm": 1.8003960847854614, "learning_rate": 4.631504241300805e-07, "loss": 0.7724, "step": 24519 }, { "epoch": 0.8660906269178635, "grad_norm": 1.7790457010269165, "learning_rate": 4.6291001918696344e-07, "loss": 0.7513, "step": 24520 }, { "epoch": 0.8661259487215714, "grad_norm": 1.698906660079956, "learning_rate": 4.626696736244285e-07, "loss": 0.7313, "step": 24521 }, { "epoch": 0.8661612705252794, "grad_norm": 1.7345203161239624, "learning_rate": 4.6242938744561873e-07, "loss": 0.7748, "step": 24522 }, { "epoch": 0.8661965923289873, "grad_norm": 1.6214301586151123, "learning_rate": 4.621891606536805e-07, "loss": 0.7847, "step": 24523 }, { "epoch": 0.8662319141326952, "grad_norm": 1.757373571395874, "learning_rate": 4.6194899325175845e-07, "loss": 0.7674, "step": 24524 }, { "epoch": 0.8662672359364031, "grad_norm": 1.8362255096435547, "learning_rate": 4.6170888524299353e-07, "loss": 0.7772, "step": 24525 }, { "epoch": 0.866302557740111, "grad_norm": 1.6434197425842285, "learning_rate": 4.6146883663053035e-07, "loss": 0.7966, "step": 24526 }, { "epoch": 0.8663378795438189, "grad_norm": 1.8391242027282715, "learning_rate": 4.612288474175103e-07, "loss": 0.7699, "step": 24527 }, { "epoch": 0.8663732013475268, "grad_norm": 1.595674753189087, "learning_rate": 4.609889176070731e-07, "loss": 0.7716, "step": 24528 }, { "epoch": 0.8664085231512347, "grad_norm": 1.7167325019836426, "learning_rate": 4.607490472023596e-07, "loss": 0.7747, "step": 24529 }, { "epoch": 0.8664438449549426, "grad_norm": 1.6921179294586182, "learning_rate": 4.605092362065089e-07, "loss": 0.7653, "step": 24530 }, { "epoch": 0.8664791667586506, "grad_norm": 1.767272710800171, "learning_rate": 4.6026948462266187e-07, "loss": 0.7801, "step": 24531 }, { "epoch": 0.8665144885623585, "grad_norm": 2.0502984523773193, "learning_rate": 4.6002979245395265e-07, "loss": 0.7496, "step": 24532 }, { "epoch": 0.8665498103660664, "grad_norm": 1.6678742170333862, "learning_rate": 4.597901597035204e-07, "loss": 0.7865, "step": 24533 }, { "epoch": 0.8665851321697743, "grad_norm": 1.6978200674057007, "learning_rate": 4.5955058637450203e-07, "loss": 0.8044, "step": 24534 }, { "epoch": 0.8666204539734822, "grad_norm": 1.6779404878616333, "learning_rate": 4.5931107247003227e-07, "loss": 0.7816, "step": 24535 }, { "epoch": 0.8666557757771901, "grad_norm": 1.798210620880127, "learning_rate": 4.5907161799324416e-07, "loss": 0.7941, "step": 24536 }, { "epoch": 0.866691097580898, "grad_norm": 1.7833950519561768, "learning_rate": 4.588322229472747e-07, "loss": 0.8256, "step": 24537 }, { "epoch": 0.8667264193846059, "grad_norm": 1.6471571922302246, "learning_rate": 4.585928873352541e-07, "loss": 0.724, "step": 24538 }, { "epoch": 0.8667617411883137, "grad_norm": 1.6289029121398926, "learning_rate": 4.583536111603165e-07, "loss": 0.7576, "step": 24539 }, { "epoch": 0.8667970629920216, "grad_norm": 1.713531255722046, "learning_rate": 4.581143944255928e-07, "loss": 0.739, "step": 24540 }, { "epoch": 0.8668323847957295, "grad_norm": 1.6271448135375977, "learning_rate": 4.578752371342149e-07, "loss": 0.8009, "step": 24541 }, { "epoch": 0.8668677065994375, "grad_norm": 1.9713490009307861, "learning_rate": 4.576361392893114e-07, "loss": 0.7595, "step": 24542 }, { "epoch": 0.8669030284031454, "grad_norm": 2.141103506088257, "learning_rate": 4.57397100894012e-07, "loss": 0.7202, "step": 24543 }, { "epoch": 0.8669383502068533, "grad_norm": 1.7076737880706787, "learning_rate": 4.571581219514465e-07, "loss": 0.7752, "step": 24544 }, { "epoch": 0.8669736720105612, "grad_norm": 1.5774611234664917, "learning_rate": 4.569192024647406e-07, "loss": 0.7552, "step": 24545 }, { "epoch": 0.8670089938142691, "grad_norm": 1.6280755996704102, "learning_rate": 4.5668034243702253e-07, "loss": 0.7642, "step": 24546 }, { "epoch": 0.867044315617977, "grad_norm": 1.6605618000030518, "learning_rate": 4.5644154187141856e-07, "loss": 0.749, "step": 24547 }, { "epoch": 0.8670796374216849, "grad_norm": 1.7788724899291992, "learning_rate": 4.5620280077105284e-07, "loss": 0.7439, "step": 24548 }, { "epoch": 0.8671149592253928, "grad_norm": 1.7219663858413696, "learning_rate": 4.559641191390507e-07, "loss": 0.7559, "step": 24549 }, { "epoch": 0.8671502810291007, "grad_norm": 1.631944179534912, "learning_rate": 4.5572549697853686e-07, "loss": 0.7391, "step": 24550 }, { "epoch": 0.8671856028328087, "grad_norm": 2.6730427742004395, "learning_rate": 4.554869342926327e-07, "loss": 0.7627, "step": 24551 }, { "epoch": 0.8672209246365166, "grad_norm": 3.348267078399658, "learning_rate": 4.552484310844613e-07, "loss": 0.7487, "step": 24552 }, { "epoch": 0.8672562464402245, "grad_norm": 1.7110034227371216, "learning_rate": 4.550099873571445e-07, "loss": 0.7727, "step": 24553 }, { "epoch": 0.8672915682439324, "grad_norm": 1.7763574123382568, "learning_rate": 4.547716031138033e-07, "loss": 0.813, "step": 24554 }, { "epoch": 0.8673268900476403, "grad_norm": 1.7716522216796875, "learning_rate": 4.5453327835755565e-07, "loss": 0.7282, "step": 24555 }, { "epoch": 0.8673622118513482, "grad_norm": 5.505316734313965, "learning_rate": 4.542950130915219e-07, "loss": 0.7725, "step": 24556 }, { "epoch": 0.8673975336550561, "grad_norm": 1.7459850311279297, "learning_rate": 4.5405680731882176e-07, "loss": 0.7621, "step": 24557 }, { "epoch": 0.867432855458764, "grad_norm": 0.9186959266662598, "learning_rate": 4.5381866104257043e-07, "loss": 0.5777, "step": 24558 }, { "epoch": 0.8674681772624719, "grad_norm": 1.8469141721725464, "learning_rate": 4.535805742658861e-07, "loss": 0.7957, "step": 24559 }, { "epoch": 0.8675034990661799, "grad_norm": 1.6616791486740112, "learning_rate": 4.533425469918851e-07, "loss": 0.7691, "step": 24560 }, { "epoch": 0.8675388208698878, "grad_norm": 1.7298752069473267, "learning_rate": 4.5310457922368154e-07, "loss": 0.7721, "step": 24561 }, { "epoch": 0.8675741426735957, "grad_norm": 1.645869493484497, "learning_rate": 4.5286667096439084e-07, "loss": 0.7248, "step": 24562 }, { "epoch": 0.8676094644773036, "grad_norm": 1.9771946668624878, "learning_rate": 4.526288222171266e-07, "loss": 0.7661, "step": 24563 }, { "epoch": 0.8676447862810115, "grad_norm": 1.8195136785507202, "learning_rate": 4.523910329850012e-07, "loss": 0.7794, "step": 24564 }, { "epoch": 0.8676801080847193, "grad_norm": 1.6953822374343872, "learning_rate": 4.521533032711267e-07, "loss": 0.764, "step": 24565 }, { "epoch": 0.8677154298884272, "grad_norm": 1.7112877368927002, "learning_rate": 4.5191563307861617e-07, "loss": 0.7858, "step": 24566 }, { "epoch": 0.8677507516921351, "grad_norm": 1.8200639486312866, "learning_rate": 4.516780224105777e-07, "loss": 0.7684, "step": 24567 }, { "epoch": 0.867786073495843, "grad_norm": 1.5629080533981323, "learning_rate": 4.514404712701226e-07, "loss": 0.7715, "step": 24568 }, { "epoch": 0.8678213952995509, "grad_norm": 1.9063239097595215, "learning_rate": 4.512029796603601e-07, "loss": 0.7773, "step": 24569 }, { "epoch": 0.8678567171032588, "grad_norm": 1.6489710807800293, "learning_rate": 4.5096554758439716e-07, "loss": 0.7395, "step": 24570 }, { "epoch": 0.8678920389069668, "grad_norm": 1.6757497787475586, "learning_rate": 4.5072817504534184e-07, "loss": 0.7279, "step": 24571 }, { "epoch": 0.8679273607106747, "grad_norm": 1.7323682308197021, "learning_rate": 4.504908620463022e-07, "loss": 0.7813, "step": 24572 }, { "epoch": 0.8679626825143826, "grad_norm": 1.743376612663269, "learning_rate": 4.5025360859038245e-07, "loss": 0.7773, "step": 24573 }, { "epoch": 0.8679980043180905, "grad_norm": 1.5654772520065308, "learning_rate": 4.500164146806879e-07, "loss": 0.7694, "step": 24574 }, { "epoch": 0.8680333261217984, "grad_norm": 1.800539493560791, "learning_rate": 4.497792803203227e-07, "loss": 0.7974, "step": 24575 }, { "epoch": 0.8680686479255063, "grad_norm": 2.0466253757476807, "learning_rate": 4.4954220551239214e-07, "loss": 0.7708, "step": 24576 }, { "epoch": 0.8681039697292142, "grad_norm": 1.6978119611740112, "learning_rate": 4.493051902599971e-07, "loss": 0.7482, "step": 24577 }, { "epoch": 0.8681392915329221, "grad_norm": 1.8292843103408813, "learning_rate": 4.4906823456624e-07, "loss": 0.7617, "step": 24578 }, { "epoch": 0.86817461333663, "grad_norm": 1.773862361907959, "learning_rate": 4.4883133843422356e-07, "loss": 0.7769, "step": 24579 }, { "epoch": 0.868209935140338, "grad_norm": 1.7201398611068726, "learning_rate": 4.485945018670457e-07, "loss": 0.7469, "step": 24580 }, { "epoch": 0.8682452569440459, "grad_norm": 1.7716330289840698, "learning_rate": 4.4835772486780783e-07, "loss": 0.7783, "step": 24581 }, { "epoch": 0.8682805787477538, "grad_norm": 1.783380389213562, "learning_rate": 4.4812100743960917e-07, "loss": 0.7936, "step": 24582 }, { "epoch": 0.8683159005514617, "grad_norm": 1.878050684928894, "learning_rate": 4.4788434958554616e-07, "loss": 0.7897, "step": 24583 }, { "epoch": 0.8683512223551696, "grad_norm": 1.7093617916107178, "learning_rate": 4.4764775130871684e-07, "loss": 0.7625, "step": 24584 }, { "epoch": 0.8683865441588775, "grad_norm": 1.7452385425567627, "learning_rate": 4.4741121261221933e-07, "loss": 0.7605, "step": 24585 }, { "epoch": 0.8684218659625854, "grad_norm": 1.6743932962417603, "learning_rate": 4.4717473349914663e-07, "loss": 0.7787, "step": 24586 }, { "epoch": 0.8684571877662933, "grad_norm": 1.6180962324142456, "learning_rate": 4.469383139725958e-07, "loss": 0.7269, "step": 24587 }, { "epoch": 0.8684925095700012, "grad_norm": 1.7637956142425537, "learning_rate": 4.4670195403565987e-07, "loss": 0.7856, "step": 24588 }, { "epoch": 0.8685278313737091, "grad_norm": 1.8659271001815796, "learning_rate": 4.464656536914341e-07, "loss": 0.7639, "step": 24589 }, { "epoch": 0.8685631531774171, "grad_norm": 1.7689203023910522, "learning_rate": 4.4622941294301e-07, "loss": 0.7776, "step": 24590 }, { "epoch": 0.8685984749811249, "grad_norm": 1.6506297588348389, "learning_rate": 4.459932317934784e-07, "loss": 0.7305, "step": 24591 }, { "epoch": 0.8686337967848328, "grad_norm": 1.667073369026184, "learning_rate": 4.457571102459318e-07, "loss": 0.7517, "step": 24592 }, { "epoch": 0.8686691185885407, "grad_norm": 1.6422772407531738, "learning_rate": 4.455210483034594e-07, "loss": 0.7763, "step": 24593 }, { "epoch": 0.8687044403922486, "grad_norm": 1.7385269403457642, "learning_rate": 4.4528504596915155e-07, "loss": 0.7697, "step": 24594 }, { "epoch": 0.8687397621959565, "grad_norm": 1.637100338935852, "learning_rate": 4.450491032460974e-07, "loss": 0.7689, "step": 24595 }, { "epoch": 0.8687750839996644, "grad_norm": 1.8436774015426636, "learning_rate": 4.4481322013738393e-07, "loss": 0.7722, "step": 24596 }, { "epoch": 0.8688104058033723, "grad_norm": 1.5886764526367188, "learning_rate": 4.445773966460981e-07, "loss": 0.7653, "step": 24597 }, { "epoch": 0.8688457276070802, "grad_norm": 1.6521248817443848, "learning_rate": 4.443416327753275e-07, "loss": 0.7513, "step": 24598 }, { "epoch": 0.8688810494107881, "grad_norm": 1.6034315824508667, "learning_rate": 4.4410592852815794e-07, "loss": 0.7534, "step": 24599 }, { "epoch": 0.868916371214496, "grad_norm": 1.8936891555786133, "learning_rate": 4.438702839076731e-07, "loss": 0.7431, "step": 24600 }, { "epoch": 0.868951693018204, "grad_norm": 1.770073652267456, "learning_rate": 4.4363469891695653e-07, "loss": 0.7472, "step": 24601 }, { "epoch": 0.8689870148219119, "grad_norm": 1.6237428188323975, "learning_rate": 4.4339917355909424e-07, "loss": 0.7407, "step": 24602 }, { "epoch": 0.8690223366256198, "grad_norm": 1.6162865161895752, "learning_rate": 4.4316370783716587e-07, "loss": 0.745, "step": 24603 }, { "epoch": 0.8690576584293277, "grad_norm": 1.736417293548584, "learning_rate": 4.429283017542546e-07, "loss": 0.7484, "step": 24604 }, { "epoch": 0.8690929802330356, "grad_norm": 2.2552590370178223, "learning_rate": 4.4269295531344123e-07, "loss": 0.7493, "step": 24605 }, { "epoch": 0.8691283020367435, "grad_norm": 1.8453513383865356, "learning_rate": 4.424576685178056e-07, "loss": 0.7551, "step": 24606 }, { "epoch": 0.8691636238404514, "grad_norm": 1.7244229316711426, "learning_rate": 4.422224413704268e-07, "loss": 0.7235, "step": 24607 }, { "epoch": 0.8691989456441593, "grad_norm": 1.856172800064087, "learning_rate": 4.4198727387438524e-07, "loss": 0.7891, "step": 24608 }, { "epoch": 0.8692342674478672, "grad_norm": 1.7941151857376099, "learning_rate": 4.4175216603275727e-07, "loss": 0.7599, "step": 24609 }, { "epoch": 0.8692695892515752, "grad_norm": 1.66016685962677, "learning_rate": 4.415171178486194e-07, "loss": 0.7587, "step": 24610 }, { "epoch": 0.8693049110552831, "grad_norm": 1.5859904289245605, "learning_rate": 4.41282129325048e-07, "loss": 0.7321, "step": 24611 }, { "epoch": 0.869340232858991, "grad_norm": 1.7200804948806763, "learning_rate": 4.410472004651206e-07, "loss": 0.7595, "step": 24612 }, { "epoch": 0.8693755546626989, "grad_norm": 1.7589192390441895, "learning_rate": 4.4081233127190925e-07, "loss": 0.7909, "step": 24613 }, { "epoch": 0.8694108764664068, "grad_norm": 1.6150552034378052, "learning_rate": 4.4057752174848924e-07, "loss": 0.7766, "step": 24614 }, { "epoch": 0.8694461982701147, "grad_norm": 1.805323839187622, "learning_rate": 4.4034277189793415e-07, "loss": 0.7468, "step": 24615 }, { "epoch": 0.8694815200738226, "grad_norm": 2.050093650817871, "learning_rate": 4.4010808172331496e-07, "loss": 0.7957, "step": 24616 }, { "epoch": 0.8695168418775304, "grad_norm": 1.6198827028274536, "learning_rate": 4.398734512277042e-07, "loss": 0.734, "step": 24617 }, { "epoch": 0.8695521636812383, "grad_norm": 1.8330248594284058, "learning_rate": 4.396388804141738e-07, "loss": 0.796, "step": 24618 }, { "epoch": 0.8695874854849462, "grad_norm": 1.7363169193267822, "learning_rate": 4.3940436928579077e-07, "loss": 0.7588, "step": 24619 }, { "epoch": 0.8696228072886542, "grad_norm": 2.0656626224517822, "learning_rate": 4.3916991784562655e-07, "loss": 0.726, "step": 24620 }, { "epoch": 0.8696581290923621, "grad_norm": 1.6513283252716064, "learning_rate": 4.389355260967498e-07, "loss": 0.7431, "step": 24621 }, { "epoch": 0.86969345089607, "grad_norm": 1.7611247301101685, "learning_rate": 4.3870119404222647e-07, "loss": 0.7597, "step": 24622 }, { "epoch": 0.8697287726997779, "grad_norm": 1.8294445276260376, "learning_rate": 4.3846692168512507e-07, "loss": 0.7622, "step": 24623 }, { "epoch": 0.8697640945034858, "grad_norm": 2.4498543739318848, "learning_rate": 4.3823270902851156e-07, "loss": 0.7546, "step": 24624 }, { "epoch": 0.8697994163071937, "grad_norm": 1.8384946584701538, "learning_rate": 4.379985560754496e-07, "loss": 0.7191, "step": 24625 }, { "epoch": 0.8698347381109016, "grad_norm": 1.6510564088821411, "learning_rate": 4.3776446282900566e-07, "loss": 0.742, "step": 24626 }, { "epoch": 0.8698700599146095, "grad_norm": 1.7632166147232056, "learning_rate": 4.375304292922433e-07, "loss": 0.7454, "step": 24627 }, { "epoch": 0.8699053817183174, "grad_norm": 1.7496974468231201, "learning_rate": 4.372964554682252e-07, "loss": 0.7659, "step": 24628 }, { "epoch": 0.8699407035220253, "grad_norm": 1.5967552661895752, "learning_rate": 4.370625413600127e-07, "loss": 0.7741, "step": 24629 }, { "epoch": 0.8699760253257333, "grad_norm": 1.758929967880249, "learning_rate": 4.368286869706673e-07, "loss": 0.7707, "step": 24630 }, { "epoch": 0.8700113471294412, "grad_norm": 1.789279580116272, "learning_rate": 4.365948923032515e-07, "loss": 0.752, "step": 24631 }, { "epoch": 0.8700466689331491, "grad_norm": 1.6740435361862183, "learning_rate": 4.363611573608234e-07, "loss": 0.7733, "step": 24632 }, { "epoch": 0.870081990736857, "grad_norm": 1.682949185371399, "learning_rate": 4.361274821464423e-07, "loss": 0.8003, "step": 24633 }, { "epoch": 0.8701173125405649, "grad_norm": 1.7375049591064453, "learning_rate": 4.3589386666316736e-07, "loss": 0.7621, "step": 24634 }, { "epoch": 0.8701526343442728, "grad_norm": 1.718148946762085, "learning_rate": 4.3566031091405505e-07, "loss": 0.7644, "step": 24635 }, { "epoch": 0.8701879561479807, "grad_norm": 1.6747876405715942, "learning_rate": 4.354268149021623e-07, "loss": 0.755, "step": 24636 }, { "epoch": 0.8702232779516886, "grad_norm": 1.6407514810562134, "learning_rate": 4.351933786305468e-07, "loss": 0.7425, "step": 24637 }, { "epoch": 0.8702585997553965, "grad_norm": 1.957862138748169, "learning_rate": 4.3496000210226094e-07, "loss": 0.7676, "step": 24638 }, { "epoch": 0.8702939215591045, "grad_norm": 1.774156093597412, "learning_rate": 4.347266853203602e-07, "loss": 0.8198, "step": 24639 }, { "epoch": 0.8703292433628124, "grad_norm": 2.6049437522888184, "learning_rate": 4.3449342828789984e-07, "loss": 0.7553, "step": 24640 }, { "epoch": 0.8703645651665203, "grad_norm": 1.6393424272537231, "learning_rate": 4.3426023100793026e-07, "loss": 0.7626, "step": 24641 }, { "epoch": 0.8703998869702282, "grad_norm": 1.8313920497894287, "learning_rate": 4.340270934835045e-07, "loss": 0.7926, "step": 24642 }, { "epoch": 0.870435208773936, "grad_norm": 1.5437490940093994, "learning_rate": 4.3379401571767453e-07, "loss": 0.7114, "step": 24643 }, { "epoch": 0.8704705305776439, "grad_norm": 1.7127258777618408, "learning_rate": 4.3356099771348914e-07, "loss": 0.8122, "step": 24644 }, { "epoch": 0.8705058523813518, "grad_norm": 1.6682111024856567, "learning_rate": 4.3332803947399917e-07, "loss": 0.7492, "step": 24645 }, { "epoch": 0.8705411741850597, "grad_norm": 1.750684142112732, "learning_rate": 4.3309514100225436e-07, "loss": 0.7649, "step": 24646 }, { "epoch": 0.8705764959887676, "grad_norm": 13.013994216918945, "learning_rate": 4.3286230230130175e-07, "loss": 0.7516, "step": 24647 }, { "epoch": 0.8706118177924755, "grad_norm": 1.5372819900512695, "learning_rate": 4.326295233741884e-07, "loss": 0.7422, "step": 24648 }, { "epoch": 0.8706471395961835, "grad_norm": 1.747578501701355, "learning_rate": 4.323968042239607e-07, "loss": 0.7333, "step": 24649 }, { "epoch": 0.8706824613998914, "grad_norm": 1.7482153177261353, "learning_rate": 4.321641448536662e-07, "loss": 0.7444, "step": 24650 }, { "epoch": 0.8707177832035993, "grad_norm": 1.7157636880874634, "learning_rate": 4.319315452663475e-07, "loss": 0.7553, "step": 24651 }, { "epoch": 0.8707531050073072, "grad_norm": 2.303226947784424, "learning_rate": 4.3169900546504996e-07, "loss": 0.7758, "step": 24652 }, { "epoch": 0.8707884268110151, "grad_norm": 1.6597261428833008, "learning_rate": 4.314665254528183e-07, "loss": 0.7568, "step": 24653 }, { "epoch": 0.870823748614723, "grad_norm": 1.6973938941955566, "learning_rate": 4.312341052326929e-07, "loss": 0.7573, "step": 24654 }, { "epoch": 0.8708590704184309, "grad_norm": 1.7710953950881958, "learning_rate": 4.3100174480771697e-07, "loss": 0.7646, "step": 24655 }, { "epoch": 0.8708943922221388, "grad_norm": 1.6762899160385132, "learning_rate": 4.3076944418093125e-07, "loss": 0.7599, "step": 24656 }, { "epoch": 0.8709297140258467, "grad_norm": 1.755340576171875, "learning_rate": 4.3053720335537676e-07, "loss": 0.7305, "step": 24657 }, { "epoch": 0.8709650358295546, "grad_norm": 1.6688673496246338, "learning_rate": 4.303050223340921e-07, "loss": 0.7721, "step": 24658 }, { "epoch": 0.8710003576332626, "grad_norm": 1.5760959386825562, "learning_rate": 4.3007290112011544e-07, "loss": 0.7253, "step": 24659 }, { "epoch": 0.8710356794369705, "grad_norm": 3.1035964488983154, "learning_rate": 4.2984083971648713e-07, "loss": 0.7381, "step": 24660 }, { "epoch": 0.8710710012406784, "grad_norm": 1.67249596118927, "learning_rate": 4.2960883812624143e-07, "loss": 0.7616, "step": 24661 }, { "epoch": 0.8711063230443863, "grad_norm": 1.6963249444961548, "learning_rate": 4.2937689635241644e-07, "loss": 0.7805, "step": 24662 }, { "epoch": 0.8711416448480942, "grad_norm": 1.746077060699463, "learning_rate": 4.2914501439804854e-07, "loss": 0.8013, "step": 24663 }, { "epoch": 0.8711769666518021, "grad_norm": 1.6665054559707642, "learning_rate": 4.2891319226617103e-07, "loss": 0.7566, "step": 24664 }, { "epoch": 0.87121228845551, "grad_norm": 1.581432580947876, "learning_rate": 4.2868142995981797e-07, "loss": 0.7595, "step": 24665 }, { "epoch": 0.8712476102592179, "grad_norm": 1.7528009414672852, "learning_rate": 4.2844972748202264e-07, "loss": 0.7354, "step": 24666 }, { "epoch": 0.8712829320629258, "grad_norm": 2.1456298828125, "learning_rate": 4.282180848358186e-07, "loss": 0.7626, "step": 24667 }, { "epoch": 0.8713182538666338, "grad_norm": 1.6707125902175903, "learning_rate": 4.279865020242363e-07, "loss": 0.7824, "step": 24668 }, { "epoch": 0.8713535756703416, "grad_norm": 1.7535815238952637, "learning_rate": 4.2775497905030715e-07, "loss": 0.7777, "step": 24669 }, { "epoch": 0.8713888974740495, "grad_norm": 1.6142138242721558, "learning_rate": 4.2752351591706266e-07, "loss": 0.7539, "step": 24670 }, { "epoch": 0.8714242192777574, "grad_norm": 1.8019357919692993, "learning_rate": 4.272921126275298e-07, "loss": 0.7498, "step": 24671 }, { "epoch": 0.8714595410814653, "grad_norm": 1.7444523572921753, "learning_rate": 4.2706076918473783e-07, "loss": 0.8115, "step": 24672 }, { "epoch": 0.8714948628851732, "grad_norm": 1.8355293273925781, "learning_rate": 4.26829485591716e-07, "loss": 0.7564, "step": 24673 }, { "epoch": 0.8715301846888811, "grad_norm": 1.6846234798431396, "learning_rate": 4.265982618514891e-07, "loss": 0.7671, "step": 24674 }, { "epoch": 0.871565506492589, "grad_norm": 1.6892114877700806, "learning_rate": 4.2636709796708476e-07, "loss": 0.76, "step": 24675 }, { "epoch": 0.8716008282962969, "grad_norm": 1.6735718250274658, "learning_rate": 4.2613599394152884e-07, "loss": 0.7439, "step": 24676 }, { "epoch": 0.8716361501000048, "grad_norm": 1.948307991027832, "learning_rate": 4.2590494977784447e-07, "loss": 0.7605, "step": 24677 }, { "epoch": 0.8716714719037127, "grad_norm": 1.8847148418426514, "learning_rate": 4.256739654790559e-07, "loss": 0.7486, "step": 24678 }, { "epoch": 0.8717067937074207, "grad_norm": 2.2246525287628174, "learning_rate": 4.2544304104818744e-07, "loss": 0.7498, "step": 24679 }, { "epoch": 0.8717421155111286, "grad_norm": 1.708781361579895, "learning_rate": 4.2521217648825995e-07, "loss": 0.7695, "step": 24680 }, { "epoch": 0.8717774373148365, "grad_norm": 1.7353951930999756, "learning_rate": 4.2498137180229604e-07, "loss": 0.7787, "step": 24681 }, { "epoch": 0.8718127591185444, "grad_norm": 1.6833879947662354, "learning_rate": 4.2475062699331605e-07, "loss": 0.798, "step": 24682 }, { "epoch": 0.8718480809222523, "grad_norm": 1.9068306684494019, "learning_rate": 4.2451994206433977e-07, "loss": 0.7519, "step": 24683 }, { "epoch": 0.8718834027259602, "grad_norm": 2.028273820877075, "learning_rate": 4.242893170183859e-07, "loss": 0.7724, "step": 24684 }, { "epoch": 0.8719187245296681, "grad_norm": 1.6995238065719604, "learning_rate": 4.240587518584732e-07, "loss": 0.7738, "step": 24685 }, { "epoch": 0.871954046333376, "grad_norm": 1.7227147817611694, "learning_rate": 4.2382824658762037e-07, "loss": 0.7568, "step": 24686 }, { "epoch": 0.8719893681370839, "grad_norm": 1.6762382984161377, "learning_rate": 4.235978012088421e-07, "loss": 0.7677, "step": 24687 }, { "epoch": 0.8720246899407919, "grad_norm": 1.8018730878829956, "learning_rate": 4.2336741572515603e-07, "loss": 0.7639, "step": 24688 }, { "epoch": 0.8720600117444998, "grad_norm": 1.69221830368042, "learning_rate": 4.2313709013957705e-07, "loss": 0.7784, "step": 24689 }, { "epoch": 0.8720953335482077, "grad_norm": 1.5883790254592896, "learning_rate": 4.229068244551193e-07, "loss": 0.764, "step": 24690 }, { "epoch": 0.8721306553519156, "grad_norm": 1.7711342573165894, "learning_rate": 4.2267661867479603e-07, "loss": 0.8138, "step": 24691 }, { "epoch": 0.8721659771556235, "grad_norm": 1.696531057357788, "learning_rate": 4.2244647280162196e-07, "loss": 0.7455, "step": 24692 }, { "epoch": 0.8722012989593314, "grad_norm": 1.7242658138275146, "learning_rate": 4.22216386838607e-07, "loss": 0.7305, "step": 24693 }, { "epoch": 0.8722366207630393, "grad_norm": 1.8314099311828613, "learning_rate": 4.2198636078876366e-07, "loss": 0.775, "step": 24694 }, { "epoch": 0.8722719425667471, "grad_norm": 1.8505239486694336, "learning_rate": 4.217563946551029e-07, "loss": 0.7432, "step": 24695 }, { "epoch": 0.872307264370455, "grad_norm": 1.5845496654510498, "learning_rate": 4.215264884406328e-07, "loss": 0.7598, "step": 24696 }, { "epoch": 0.8723425861741629, "grad_norm": 1.7158100605010986, "learning_rate": 4.212966421483633e-07, "loss": 0.7467, "step": 24697 }, { "epoch": 0.8723779079778708, "grad_norm": 1.6359779834747314, "learning_rate": 4.210668557813041e-07, "loss": 0.7483, "step": 24698 }, { "epoch": 0.8724132297815788, "grad_norm": 1.703108787536621, "learning_rate": 4.2083712934245957e-07, "loss": 0.8031, "step": 24699 }, { "epoch": 0.8724485515852867, "grad_norm": 1.8379460573196411, "learning_rate": 4.2060746283483833e-07, "loss": 0.8233, "step": 24700 }, { "epoch": 0.8724838733889946, "grad_norm": 1.6555930376052856, "learning_rate": 4.203778562614469e-07, "loss": 0.7649, "step": 24701 }, { "epoch": 0.8725191951927025, "grad_norm": 1.877004623413086, "learning_rate": 4.2014830962528895e-07, "loss": 0.7791, "step": 24702 }, { "epoch": 0.8725545169964104, "grad_norm": 1.6221171617507935, "learning_rate": 4.199188229293677e-07, "loss": 0.7698, "step": 24703 }, { "epoch": 0.8725898388001183, "grad_norm": 1.8578732013702393, "learning_rate": 4.196893961766885e-07, "loss": 0.7966, "step": 24704 }, { "epoch": 0.8726251606038262, "grad_norm": 1.685693383216858, "learning_rate": 4.1946002937025443e-07, "loss": 0.7716, "step": 24705 }, { "epoch": 0.8726604824075341, "grad_norm": 1.9733318090438843, "learning_rate": 4.1923072251306485e-07, "loss": 0.7812, "step": 24706 }, { "epoch": 0.872695804211242, "grad_norm": 1.776991844177246, "learning_rate": 4.190014756081229e-07, "loss": 0.7509, "step": 24707 }, { "epoch": 0.87273112601495, "grad_norm": 1.6963540315628052, "learning_rate": 4.1877228865842944e-07, "loss": 0.7651, "step": 24708 }, { "epoch": 0.8727664478186579, "grad_norm": 2.1201412677764893, "learning_rate": 4.185431616669822e-07, "loss": 0.7485, "step": 24709 }, { "epoch": 0.8728017696223658, "grad_norm": 1.6180306673049927, "learning_rate": 4.1831409463678086e-07, "loss": 0.7544, "step": 24710 }, { "epoch": 0.8728370914260737, "grad_norm": 1.7235783338546753, "learning_rate": 4.180850875708242e-07, "loss": 0.7928, "step": 24711 }, { "epoch": 0.8728724132297816, "grad_norm": 1.7368708848953247, "learning_rate": 4.178561404721082e-07, "loss": 0.7434, "step": 24712 }, { "epoch": 0.8729077350334895, "grad_norm": 1.5269944667816162, "learning_rate": 4.176272533436293e-07, "loss": 0.7357, "step": 24713 }, { "epoch": 0.8729430568371974, "grad_norm": 1.9880845546722412, "learning_rate": 4.1739842618838344e-07, "loss": 0.7399, "step": 24714 }, { "epoch": 0.8729783786409053, "grad_norm": 2.6327710151672363, "learning_rate": 4.17169659009366e-07, "loss": 0.757, "step": 24715 }, { "epoch": 0.8730137004446132, "grad_norm": 1.5462709665298462, "learning_rate": 4.169409518095702e-07, "loss": 0.7358, "step": 24716 }, { "epoch": 0.8730490222483211, "grad_norm": 1.6071280241012573, "learning_rate": 4.1671230459198966e-07, "loss": 0.7611, "step": 24717 }, { "epoch": 0.8730843440520291, "grad_norm": 1.5805659294128418, "learning_rate": 4.164837173596181e-07, "loss": 0.7541, "step": 24718 }, { "epoch": 0.873119665855737, "grad_norm": 2.486692428588867, "learning_rate": 4.162551901154449e-07, "loss": 0.7425, "step": 24719 }, { "epoch": 0.8731549876594449, "grad_norm": 1.6929186582565308, "learning_rate": 4.1602672286246257e-07, "loss": 0.7808, "step": 24720 }, { "epoch": 0.8731903094631527, "grad_norm": 2.036151647567749, "learning_rate": 4.1579831560366157e-07, "loss": 0.7648, "step": 24721 }, { "epoch": 0.8732256312668606, "grad_norm": 3.927995443344116, "learning_rate": 4.155699683420289e-07, "loss": 0.7295, "step": 24722 }, { "epoch": 0.8732609530705685, "grad_norm": 1.7854162454605103, "learning_rate": 4.153416810805555e-07, "loss": 0.7878, "step": 24723 }, { "epoch": 0.8732962748742764, "grad_norm": 1.5189275741577148, "learning_rate": 4.151134538222279e-07, "loss": 0.7564, "step": 24724 }, { "epoch": 0.8733315966779843, "grad_norm": 1.6366890668869019, "learning_rate": 4.1488528657003425e-07, "loss": 0.7272, "step": 24725 }, { "epoch": 0.8733669184816922, "grad_norm": 1.6651535034179688, "learning_rate": 4.146571793269594e-07, "loss": 0.7513, "step": 24726 }, { "epoch": 0.8734022402854001, "grad_norm": 1.6328872442245483, "learning_rate": 4.1442913209598923e-07, "loss": 0.7651, "step": 24727 }, { "epoch": 0.873437562089108, "grad_norm": 1.5771417617797852, "learning_rate": 4.1420114488010975e-07, "loss": 0.7694, "step": 24728 }, { "epoch": 0.873472883892816, "grad_norm": 1.802451491355896, "learning_rate": 4.1397321768230247e-07, "loss": 0.777, "step": 24729 }, { "epoch": 0.8735082056965239, "grad_norm": 1.638562798500061, "learning_rate": 4.1374535050555164e-07, "loss": 0.7731, "step": 24730 }, { "epoch": 0.8735435275002318, "grad_norm": 1.581260323524475, "learning_rate": 4.13517543352841e-07, "loss": 0.7312, "step": 24731 }, { "epoch": 0.8735788493039397, "grad_norm": 1.8899821043014526, "learning_rate": 4.1328979622714927e-07, "loss": 0.7739, "step": 24732 }, { "epoch": 0.8736141711076476, "grad_norm": 1.7181016206741333, "learning_rate": 4.130621091314585e-07, "loss": 0.7689, "step": 24733 }, { "epoch": 0.8736494929113555, "grad_norm": 1.7967768907546997, "learning_rate": 4.128344820687491e-07, "loss": 0.7459, "step": 24734 }, { "epoch": 0.8736848147150634, "grad_norm": 1.626713752746582, "learning_rate": 4.1260691504199925e-07, "loss": 0.7337, "step": 24735 }, { "epoch": 0.8737201365187713, "grad_norm": 1.69413161277771, "learning_rate": 4.123794080541876e-07, "loss": 0.7398, "step": 24736 }, { "epoch": 0.8737554583224793, "grad_norm": 1.6475797891616821, "learning_rate": 4.12151961108293e-07, "loss": 0.7474, "step": 24737 }, { "epoch": 0.8737907801261872, "grad_norm": 2.0062222480773926, "learning_rate": 4.119245742072914e-07, "loss": 0.7741, "step": 24738 }, { "epoch": 0.8738261019298951, "grad_norm": 1.600980520248413, "learning_rate": 4.1169724735415695e-07, "loss": 0.7136, "step": 24739 }, { "epoch": 0.873861423733603, "grad_norm": 1.8079725503921509, "learning_rate": 4.1146998055186684e-07, "loss": 0.7835, "step": 24740 }, { "epoch": 0.8738967455373109, "grad_norm": 1.8837884664535522, "learning_rate": 4.1124277380339584e-07, "loss": 0.7737, "step": 24741 }, { "epoch": 0.8739320673410188, "grad_norm": 1.5659087896347046, "learning_rate": 4.110156271117155e-07, "loss": 0.7433, "step": 24742 }, { "epoch": 0.8739673891447267, "grad_norm": 1.74030339717865, "learning_rate": 4.1078854047980063e-07, "loss": 0.779, "step": 24743 }, { "epoch": 0.8740027109484346, "grad_norm": 1.717881679534912, "learning_rate": 4.105615139106234e-07, "loss": 0.7516, "step": 24744 }, { "epoch": 0.8740380327521425, "grad_norm": 1.570588231086731, "learning_rate": 4.103345474071535e-07, "loss": 0.7572, "step": 24745 }, { "epoch": 0.8740733545558504, "grad_norm": 1.5977201461791992, "learning_rate": 4.10107640972362e-07, "loss": 0.7579, "step": 24746 }, { "epoch": 0.8741086763595582, "grad_norm": 1.7187129259109497, "learning_rate": 4.098807946092198e-07, "loss": 0.7746, "step": 24747 }, { "epoch": 0.8741439981632662, "grad_norm": 1.5763357877731323, "learning_rate": 4.0965400832069456e-07, "loss": 0.7786, "step": 24748 }, { "epoch": 0.8741793199669741, "grad_norm": 1.607475757598877, "learning_rate": 4.094272821097539e-07, "loss": 0.77, "step": 24749 }, { "epoch": 0.874214641770682, "grad_norm": 1.6931850910186768, "learning_rate": 4.0920061597936765e-07, "loss": 0.7519, "step": 24750 }, { "epoch": 0.8742499635743899, "grad_norm": 1.6547906398773193, "learning_rate": 4.089740099324996e-07, "loss": 0.7245, "step": 24751 }, { "epoch": 0.8742852853780978, "grad_norm": 1.6818984746932983, "learning_rate": 4.0874746397211675e-07, "loss": 0.777, "step": 24752 }, { "epoch": 0.8743206071818057, "grad_norm": 1.7173614501953125, "learning_rate": 4.085209781011851e-07, "loss": 0.7444, "step": 24753 }, { "epoch": 0.8743559289855136, "grad_norm": 1.7962397336959839, "learning_rate": 4.0829455232266667e-07, "loss": 0.7584, "step": 24754 }, { "epoch": 0.8743912507892215, "grad_norm": 1.7954072952270508, "learning_rate": 4.080681866395259e-07, "loss": 0.7352, "step": 24755 }, { "epoch": 0.8744265725929294, "grad_norm": 1.653625249862671, "learning_rate": 4.0784188105472635e-07, "loss": 0.7673, "step": 24756 }, { "epoch": 0.8744618943966374, "grad_norm": 1.9188123941421509, "learning_rate": 4.0761563557122906e-07, "loss": 0.7703, "step": 24757 }, { "epoch": 0.8744972162003453, "grad_norm": 1.6990184783935547, "learning_rate": 4.073894501919945e-07, "loss": 0.7208, "step": 24758 }, { "epoch": 0.8745325380040532, "grad_norm": 1.8737668991088867, "learning_rate": 4.0716332491998354e-07, "loss": 0.7825, "step": 24759 }, { "epoch": 0.8745678598077611, "grad_norm": 1.8692106008529663, "learning_rate": 4.0693725975815614e-07, "loss": 0.7518, "step": 24760 }, { "epoch": 0.874603181611469, "grad_norm": 1.7685978412628174, "learning_rate": 4.0671125470946984e-07, "loss": 0.764, "step": 24761 }, { "epoch": 0.8746385034151769, "grad_norm": 1.6869255304336548, "learning_rate": 4.0648530977688286e-07, "loss": 0.7617, "step": 24762 }, { "epoch": 0.8746738252188848, "grad_norm": 1.4876128435134888, "learning_rate": 4.0625942496335393e-07, "loss": 0.7221, "step": 24763 }, { "epoch": 0.8747091470225927, "grad_norm": 1.7100352048873901, "learning_rate": 4.0603360027183683e-07, "loss": 0.7739, "step": 24764 }, { "epoch": 0.8747444688263006, "grad_norm": 3.0389420986175537, "learning_rate": 4.058078357052886e-07, "loss": 0.7869, "step": 24765 }, { "epoch": 0.8747797906300085, "grad_norm": 1.84609055519104, "learning_rate": 4.0558213126666467e-07, "loss": 0.7315, "step": 24766 }, { "epoch": 0.8748151124337165, "grad_norm": 1.5723533630371094, "learning_rate": 4.053564869589177e-07, "loss": 0.7462, "step": 24767 }, { "epoch": 0.8748504342374244, "grad_norm": 1.7040523290634155, "learning_rate": 4.051309027850009e-07, "loss": 0.7203, "step": 24768 }, { "epoch": 0.8748857560411323, "grad_norm": 1.9039413928985596, "learning_rate": 4.0490537874786795e-07, "loss": 0.791, "step": 24769 }, { "epoch": 0.8749210778448402, "grad_norm": 1.7202225923538208, "learning_rate": 4.046799148504688e-07, "loss": 0.7544, "step": 24770 }, { "epoch": 0.8749563996485481, "grad_norm": 1.7489160299301147, "learning_rate": 4.044545110957554e-07, "loss": 0.8046, "step": 24771 }, { "epoch": 0.874991721452256, "grad_norm": 1.6916440725326538, "learning_rate": 4.042291674866772e-07, "loss": 0.78, "step": 24772 }, { "epoch": 0.8750270432559639, "grad_norm": 1.465721845626831, "learning_rate": 4.040038840261845e-07, "loss": 0.7443, "step": 24773 }, { "epoch": 0.8750623650596717, "grad_norm": 1.6802374124526978, "learning_rate": 4.037786607172239e-07, "loss": 0.7632, "step": 24774 }, { "epoch": 0.8750976868633796, "grad_norm": 1.7826043367385864, "learning_rate": 4.035534975627459e-07, "loss": 0.772, "step": 24775 }, { "epoch": 0.8751330086670875, "grad_norm": 1.5531023740768433, "learning_rate": 4.033283945656952e-07, "loss": 0.7446, "step": 24776 }, { "epoch": 0.8751683304707955, "grad_norm": 1.5706907510757446, "learning_rate": 4.0310335172901726e-07, "loss": 0.737, "step": 24777 }, { "epoch": 0.8752036522745034, "grad_norm": 1.6262593269348145, "learning_rate": 4.0287836905565925e-07, "loss": 0.7622, "step": 24778 }, { "epoch": 0.8752389740782113, "grad_norm": 1.7316757440567017, "learning_rate": 4.026534465485654e-07, "loss": 0.7866, "step": 24779 }, { "epoch": 0.8752742958819192, "grad_norm": 2.1090354919433594, "learning_rate": 4.024285842106779e-07, "loss": 0.734, "step": 24780 }, { "epoch": 0.8753096176856271, "grad_norm": 1.8953113555908203, "learning_rate": 4.022037820449409e-07, "loss": 0.7376, "step": 24781 }, { "epoch": 0.875344939489335, "grad_norm": 1.5238524675369263, "learning_rate": 4.019790400542972e-07, "loss": 0.7401, "step": 24782 }, { "epoch": 0.8753802612930429, "grad_norm": 1.7331922054290771, "learning_rate": 4.0175435824168776e-07, "loss": 0.801, "step": 24783 }, { "epoch": 0.8754155830967508, "grad_norm": 1.6652839183807373, "learning_rate": 4.0152973661005236e-07, "loss": 0.7339, "step": 24784 }, { "epoch": 0.8754509049004587, "grad_norm": 1.6772302389144897, "learning_rate": 4.013051751623309e-07, "loss": 0.7409, "step": 24785 }, { "epoch": 0.8754862267041666, "grad_norm": 1.6026118993759155, "learning_rate": 4.0108067390146386e-07, "loss": 0.7278, "step": 24786 }, { "epoch": 0.8755215485078746, "grad_norm": 2.040649890899658, "learning_rate": 4.008562328303878e-07, "loss": 0.8029, "step": 24787 }, { "epoch": 0.8755568703115825, "grad_norm": 1.868435263633728, "learning_rate": 4.006318519520408e-07, "loss": 0.7734, "step": 24788 }, { "epoch": 0.8755921921152904, "grad_norm": 1.597093939781189, "learning_rate": 4.0040753126936004e-07, "loss": 0.7629, "step": 24789 }, { "epoch": 0.8756275139189983, "grad_norm": 1.8221091032028198, "learning_rate": 4.001832707852804e-07, "loss": 0.7622, "step": 24790 }, { "epoch": 0.8756628357227062, "grad_norm": 1.6326524019241333, "learning_rate": 3.999590705027373e-07, "loss": 0.7636, "step": 24791 }, { "epoch": 0.8756981575264141, "grad_norm": 1.6927069425582886, "learning_rate": 3.9973493042466615e-07, "loss": 0.7647, "step": 24792 }, { "epoch": 0.875733479330122, "grad_norm": 1.6533317565917969, "learning_rate": 3.9951085055399964e-07, "loss": 0.7388, "step": 24793 }, { "epoch": 0.8757688011338299, "grad_norm": 1.710789442062378, "learning_rate": 3.9928683089366927e-07, "loss": 0.7651, "step": 24794 }, { "epoch": 0.8758041229375378, "grad_norm": 1.6009368896484375, "learning_rate": 3.990628714466077e-07, "loss": 0.7621, "step": 24795 }, { "epoch": 0.8758394447412458, "grad_norm": 1.6769065856933594, "learning_rate": 3.988389722157476e-07, "loss": 0.7578, "step": 24796 }, { "epoch": 0.8758747665449537, "grad_norm": 1.6620981693267822, "learning_rate": 3.9861513320401723e-07, "loss": 0.7801, "step": 24797 }, { "epoch": 0.8759100883486616, "grad_norm": 1.6988505125045776, "learning_rate": 3.9839135441434697e-07, "loss": 0.7543, "step": 24798 }, { "epoch": 0.8759454101523695, "grad_norm": 1.6473755836486816, "learning_rate": 3.9816763584966666e-07, "loss": 0.7773, "step": 24799 }, { "epoch": 0.8759807319560773, "grad_norm": 1.8311488628387451, "learning_rate": 3.979439775129024e-07, "loss": 0.766, "step": 24800 }, { "epoch": 0.8760160537597852, "grad_norm": 1.8880330324172974, "learning_rate": 3.977203794069817e-07, "loss": 0.7713, "step": 24801 }, { "epoch": 0.8760513755634931, "grad_norm": 1.7827731370925903, "learning_rate": 3.9749684153483294e-07, "loss": 0.7789, "step": 24802 }, { "epoch": 0.876086697367201, "grad_norm": 1.6541143655776978, "learning_rate": 3.972733638993798e-07, "loss": 0.7572, "step": 24803 }, { "epoch": 0.8761220191709089, "grad_norm": 1.645836353302002, "learning_rate": 3.970499465035471e-07, "loss": 0.7562, "step": 24804 }, { "epoch": 0.8761573409746168, "grad_norm": 1.7876427173614502, "learning_rate": 3.968265893502604e-07, "loss": 0.7833, "step": 24805 }, { "epoch": 0.8761926627783247, "grad_norm": 1.6469261646270752, "learning_rate": 3.966032924424412e-07, "loss": 0.7472, "step": 24806 }, { "epoch": 0.8762279845820327, "grad_norm": 1.7081611156463623, "learning_rate": 3.9638005578301276e-07, "loss": 0.7658, "step": 24807 }, { "epoch": 0.8762633063857406, "grad_norm": 1.7070770263671875, "learning_rate": 3.9615687937489766e-07, "loss": 0.7581, "step": 24808 }, { "epoch": 0.8762986281894485, "grad_norm": 1.771141529083252, "learning_rate": 3.9593376322101474e-07, "loss": 0.7484, "step": 24809 }, { "epoch": 0.8763339499931564, "grad_norm": 0.9511443376541138, "learning_rate": 3.957107073242855e-07, "loss": 0.5723, "step": 24810 }, { "epoch": 0.8763692717968643, "grad_norm": 1.763135313987732, "learning_rate": 3.954877116876299e-07, "loss": 0.7734, "step": 24811 }, { "epoch": 0.8764045936005722, "grad_norm": 1.646231770515442, "learning_rate": 3.9526477631396554e-07, "loss": 0.7403, "step": 24812 }, { "epoch": 0.8764399154042801, "grad_norm": 1.841217041015625, "learning_rate": 3.9504190120620957e-07, "loss": 0.7677, "step": 24813 }, { "epoch": 0.876475237207988, "grad_norm": 1.6208668947219849, "learning_rate": 3.9481908636727905e-07, "loss": 0.755, "step": 24814 }, { "epoch": 0.8765105590116959, "grad_norm": 1.896393060684204, "learning_rate": 3.9459633180009174e-07, "loss": 0.8014, "step": 24815 }, { "epoch": 0.8765458808154039, "grad_norm": 3.1463019847869873, "learning_rate": 3.943736375075613e-07, "loss": 0.7384, "step": 24816 }, { "epoch": 0.8765812026191118, "grad_norm": 1.5741021633148193, "learning_rate": 3.941510034926027e-07, "loss": 0.7031, "step": 24817 }, { "epoch": 0.8766165244228197, "grad_norm": 1.6151572465896606, "learning_rate": 3.9392842975813086e-07, "loss": 0.7278, "step": 24818 }, { "epoch": 0.8766518462265276, "grad_norm": 1.7918140888214111, "learning_rate": 3.9370591630705723e-07, "loss": 0.7741, "step": 24819 }, { "epoch": 0.8766871680302355, "grad_norm": 1.907326102256775, "learning_rate": 3.9348346314229403e-07, "loss": 0.7694, "step": 24820 }, { "epoch": 0.8767224898339434, "grad_norm": 1.5996828079223633, "learning_rate": 3.9326107026675495e-07, "loss": 0.7322, "step": 24821 }, { "epoch": 0.8767578116376513, "grad_norm": 1.8815574645996094, "learning_rate": 3.9303873768334775e-07, "loss": 0.741, "step": 24822 }, { "epoch": 0.8767931334413592, "grad_norm": 1.6913156509399414, "learning_rate": 3.928164653949834e-07, "loss": 0.7576, "step": 24823 }, { "epoch": 0.8768284552450671, "grad_norm": 1.8022902011871338, "learning_rate": 3.9259425340457234e-07, "loss": 0.7519, "step": 24824 }, { "epoch": 0.876863777048775, "grad_norm": 1.6582790613174438, "learning_rate": 3.923721017150206e-07, "loss": 0.8087, "step": 24825 }, { "epoch": 0.8768990988524829, "grad_norm": 1.7695367336273193, "learning_rate": 3.9215001032923695e-07, "loss": 0.7715, "step": 24826 }, { "epoch": 0.8769344206561908, "grad_norm": 1.715051293373108, "learning_rate": 3.9192797925012795e-07, "loss": 0.7474, "step": 24827 }, { "epoch": 0.8769697424598987, "grad_norm": 1.5848883390426636, "learning_rate": 3.91706008480599e-07, "loss": 0.7315, "step": 24828 }, { "epoch": 0.8770050642636066, "grad_norm": 1.7094658613204956, "learning_rate": 3.9148409802355514e-07, "loss": 0.7841, "step": 24829 }, { "epoch": 0.8770403860673145, "grad_norm": 1.8975989818572998, "learning_rate": 3.9126224788190225e-07, "loss": 0.8141, "step": 24830 }, { "epoch": 0.8770757078710224, "grad_norm": 1.7174468040466309, "learning_rate": 3.9104045805854306e-07, "loss": 0.7719, "step": 24831 }, { "epoch": 0.8771110296747303, "grad_norm": 1.8319562673568726, "learning_rate": 3.908187285563786e-07, "loss": 0.7776, "step": 24832 }, { "epoch": 0.8771463514784382, "grad_norm": 1.7463054656982422, "learning_rate": 3.9059705937831206e-07, "loss": 0.7519, "step": 24833 }, { "epoch": 0.8771816732821461, "grad_norm": 1.8074560165405273, "learning_rate": 3.9037545052724613e-07, "loss": 0.7305, "step": 24834 }, { "epoch": 0.877216995085854, "grad_norm": 1.766350269317627, "learning_rate": 3.901539020060785e-07, "loss": 0.7392, "step": 24835 }, { "epoch": 0.877252316889562, "grad_norm": 1.645024299621582, "learning_rate": 3.8993241381771017e-07, "loss": 0.7602, "step": 24836 }, { "epoch": 0.8772876386932699, "grad_norm": 1.6500072479248047, "learning_rate": 3.8971098596504056e-07, "loss": 0.7714, "step": 24837 }, { "epoch": 0.8773229604969778, "grad_norm": 1.8543649911880493, "learning_rate": 3.8948961845096613e-07, "loss": 0.7603, "step": 24838 }, { "epoch": 0.8773582823006857, "grad_norm": 1.6928596496582031, "learning_rate": 3.8926831127838517e-07, "loss": 0.7683, "step": 24839 }, { "epoch": 0.8773936041043936, "grad_norm": 1.7904502153396606, "learning_rate": 3.8904706445019367e-07, "loss": 0.7859, "step": 24840 }, { "epoch": 0.8774289259081015, "grad_norm": 1.6831387281417847, "learning_rate": 3.888258779692883e-07, "loss": 0.7569, "step": 24841 }, { "epoch": 0.8774642477118094, "grad_norm": 2.057436943054199, "learning_rate": 3.8860475183856214e-07, "loss": 0.7751, "step": 24842 }, { "epoch": 0.8774995695155173, "grad_norm": 1.7968307733535767, "learning_rate": 3.883836860609097e-07, "loss": 0.771, "step": 24843 }, { "epoch": 0.8775348913192252, "grad_norm": 1.7247320413589478, "learning_rate": 3.8816268063922637e-07, "loss": 0.7363, "step": 24844 }, { "epoch": 0.8775702131229332, "grad_norm": 1.6017742156982422, "learning_rate": 3.879417355764015e-07, "loss": 0.7545, "step": 24845 }, { "epoch": 0.8776055349266411, "grad_norm": 1.6743515729904175, "learning_rate": 3.877208508753283e-07, "loss": 0.756, "step": 24846 }, { "epoch": 0.877640856730349, "grad_norm": 1.6230742931365967, "learning_rate": 3.875000265388984e-07, "loss": 0.7514, "step": 24847 }, { "epoch": 0.8776761785340569, "grad_norm": 1.6764228343963623, "learning_rate": 3.872792625700006e-07, "loss": 0.7547, "step": 24848 }, { "epoch": 0.8777115003377648, "grad_norm": 1.550033450126648, "learning_rate": 3.8705855897152533e-07, "loss": 0.7623, "step": 24849 }, { "epoch": 0.8777468221414727, "grad_norm": 1.669027328491211, "learning_rate": 3.8683791574635974e-07, "loss": 0.7514, "step": 24850 }, { "epoch": 0.8777821439451806, "grad_norm": 1.7392301559448242, "learning_rate": 3.8661733289739325e-07, "loss": 0.7475, "step": 24851 }, { "epoch": 0.8778174657488884, "grad_norm": 1.534627914428711, "learning_rate": 3.8639681042751065e-07, "loss": 0.7104, "step": 24852 }, { "epoch": 0.8778527875525963, "grad_norm": 1.638047695159912, "learning_rate": 3.861763483395997e-07, "loss": 0.7624, "step": 24853 }, { "epoch": 0.8778881093563042, "grad_norm": 1.7212421894073486, "learning_rate": 3.859559466365459e-07, "loss": 0.7834, "step": 24854 }, { "epoch": 0.8779234311600121, "grad_norm": 1.8412415981292725, "learning_rate": 3.85735605321233e-07, "loss": 0.7783, "step": 24855 }, { "epoch": 0.8779587529637201, "grad_norm": 1.6150718927383423, "learning_rate": 3.8551532439654483e-07, "loss": 0.7708, "step": 24856 }, { "epoch": 0.877994074767428, "grad_norm": 1.6210432052612305, "learning_rate": 3.852951038653652e-07, "loss": 0.7296, "step": 24857 }, { "epoch": 0.8780293965711359, "grad_norm": 1.776800513267517, "learning_rate": 3.8507494373057505e-07, "loss": 0.7886, "step": 24858 }, { "epoch": 0.8780647183748438, "grad_norm": 1.725074291229248, "learning_rate": 3.8485484399505667e-07, "loss": 0.7878, "step": 24859 }, { "epoch": 0.8781000401785517, "grad_norm": 1.8190152645111084, "learning_rate": 3.846348046616916e-07, "loss": 0.8065, "step": 24860 }, { "epoch": 0.8781353619822596, "grad_norm": 1.610197901725769, "learning_rate": 3.8441482573335744e-07, "loss": 0.7676, "step": 24861 }, { "epoch": 0.8781706837859675, "grad_norm": 1.7014135122299194, "learning_rate": 3.841949072129347e-07, "loss": 0.7354, "step": 24862 }, { "epoch": 0.8782060055896754, "grad_norm": 1.6083248853683472, "learning_rate": 3.839750491033023e-07, "loss": 0.7584, "step": 24863 }, { "epoch": 0.8782413273933833, "grad_norm": 2.306596040725708, "learning_rate": 3.837552514073356e-07, "loss": 0.7687, "step": 24864 }, { "epoch": 0.8782766491970913, "grad_norm": 1.8988453149795532, "learning_rate": 3.8353551412791234e-07, "loss": 0.7729, "step": 24865 }, { "epoch": 0.8783119710007992, "grad_norm": 1.7309961318969727, "learning_rate": 3.833158372679091e-07, "loss": 0.7594, "step": 24866 }, { "epoch": 0.8783472928045071, "grad_norm": 1.768429160118103, "learning_rate": 3.830962208302008e-07, "loss": 0.7644, "step": 24867 }, { "epoch": 0.878382614608215, "grad_norm": 1.8253117799758911, "learning_rate": 3.828766648176602e-07, "loss": 0.7289, "step": 24868 }, { "epoch": 0.8784179364119229, "grad_norm": 1.8098539113998413, "learning_rate": 3.826571692331621e-07, "loss": 0.7621, "step": 24869 }, { "epoch": 0.8784532582156308, "grad_norm": 1.6698589324951172, "learning_rate": 3.8243773407957987e-07, "loss": 0.7715, "step": 24870 }, { "epoch": 0.8784885800193387, "grad_norm": 1.8002761602401733, "learning_rate": 3.8221835935978335e-07, "loss": 0.7381, "step": 24871 }, { "epoch": 0.8785239018230466, "grad_norm": 2.0559937953948975, "learning_rate": 3.819990450766453e-07, "loss": 0.7601, "step": 24872 }, { "epoch": 0.8785592236267545, "grad_norm": 2.093384265899658, "learning_rate": 3.8177979123303624e-07, "loss": 0.7913, "step": 24873 }, { "epoch": 0.8785945454304624, "grad_norm": 1.762073040008545, "learning_rate": 3.815605978318243e-07, "loss": 0.7762, "step": 24874 }, { "epoch": 0.8786298672341704, "grad_norm": 1.9499125480651855, "learning_rate": 3.813414648758795e-07, "loss": 0.7618, "step": 24875 }, { "epoch": 0.8786651890378783, "grad_norm": 1.7022705078125, "learning_rate": 3.811223923680696e-07, "loss": 0.7555, "step": 24876 }, { "epoch": 0.8787005108415862, "grad_norm": 1.6813850402832031, "learning_rate": 3.8090338031126106e-07, "loss": 0.7658, "step": 24877 }, { "epoch": 0.878735832645294, "grad_norm": 1.7471346855163574, "learning_rate": 3.8068442870832057e-07, "loss": 0.7547, "step": 24878 }, { "epoch": 0.8787711544490019, "grad_norm": 0.8917232751846313, "learning_rate": 3.8046553756211523e-07, "loss": 0.5984, "step": 24879 }, { "epoch": 0.8788064762527098, "grad_norm": 1.548413634300232, "learning_rate": 3.802467068755072e-07, "loss": 0.7486, "step": 24880 }, { "epoch": 0.8788417980564177, "grad_norm": 1.6903048753738403, "learning_rate": 3.800279366513626e-07, "loss": 0.7999, "step": 24881 }, { "epoch": 0.8788771198601256, "grad_norm": 1.8033325672149658, "learning_rate": 3.798092268925441e-07, "loss": 0.7746, "step": 24882 }, { "epoch": 0.8789124416638335, "grad_norm": 1.7888851165771484, "learning_rate": 3.795905776019132e-07, "loss": 0.7501, "step": 24883 }, { "epoch": 0.8789477634675414, "grad_norm": 1.786811351776123, "learning_rate": 3.793719887823322e-07, "loss": 0.8018, "step": 24884 }, { "epoch": 0.8789830852712494, "grad_norm": 1.7255457639694214, "learning_rate": 3.791534604366631e-07, "loss": 0.8011, "step": 24885 }, { "epoch": 0.8790184070749573, "grad_norm": 1.678956389427185, "learning_rate": 3.789349925677649e-07, "loss": 0.7942, "step": 24886 }, { "epoch": 0.8790537288786652, "grad_norm": 2.4362120628356934, "learning_rate": 3.787165851784957e-07, "loss": 0.7479, "step": 24887 }, { "epoch": 0.8790890506823731, "grad_norm": 1.7818081378936768, "learning_rate": 3.784982382717156e-07, "loss": 0.7717, "step": 24888 }, { "epoch": 0.879124372486081, "grad_norm": 1.8702841997146606, "learning_rate": 3.782799518502822e-07, "loss": 0.7398, "step": 24889 }, { "epoch": 0.8791596942897889, "grad_norm": 1.621086597442627, "learning_rate": 3.780617259170516e-07, "loss": 0.7483, "step": 24890 }, { "epoch": 0.8791950160934968, "grad_norm": 1.6965630054473877, "learning_rate": 3.778435604748798e-07, "loss": 0.7575, "step": 24891 }, { "epoch": 0.8792303378972047, "grad_norm": 1.7575210332870483, "learning_rate": 3.776254555266234e-07, "loss": 0.7709, "step": 24892 }, { "epoch": 0.8792656597009126, "grad_norm": 1.876998782157898, "learning_rate": 3.774074110751358e-07, "loss": 0.8029, "step": 24893 }, { "epoch": 0.8793009815046205, "grad_norm": 1.554013967514038, "learning_rate": 3.7718942712327065e-07, "loss": 0.7187, "step": 24894 }, { "epoch": 0.8793363033083285, "grad_norm": 1.6183661222457886, "learning_rate": 3.769715036738825e-07, "loss": 0.7687, "step": 24895 }, { "epoch": 0.8793716251120364, "grad_norm": 1.8593342304229736, "learning_rate": 3.767536407298211e-07, "loss": 0.755, "step": 24896 }, { "epoch": 0.8794069469157443, "grad_norm": 2.2007126808166504, "learning_rate": 3.765358382939394e-07, "loss": 0.7483, "step": 24897 }, { "epoch": 0.8794422687194522, "grad_norm": 1.9036365747451782, "learning_rate": 3.7631809636908714e-07, "loss": 0.7566, "step": 24898 }, { "epoch": 0.8794775905231601, "grad_norm": 0.9663906097412109, "learning_rate": 3.7610041495811545e-07, "loss": 0.5826, "step": 24899 }, { "epoch": 0.879512912326868, "grad_norm": 1.5533257722854614, "learning_rate": 3.758827940638715e-07, "loss": 0.7436, "step": 24900 }, { "epoch": 0.8795482341305759, "grad_norm": 2.0539963245391846, "learning_rate": 3.756652336892042e-07, "loss": 0.7405, "step": 24901 }, { "epoch": 0.8795835559342838, "grad_norm": 1.6859532594680786, "learning_rate": 3.7544773383696165e-07, "loss": 0.7512, "step": 24902 }, { "epoch": 0.8796188777379917, "grad_norm": 1.6048824787139893, "learning_rate": 3.752302945099895e-07, "loss": 0.7484, "step": 24903 }, { "epoch": 0.8796541995416995, "grad_norm": 1.6992781162261963, "learning_rate": 3.750129157111343e-07, "loss": 0.7934, "step": 24904 }, { "epoch": 0.8796895213454075, "grad_norm": 1.5794973373413086, "learning_rate": 3.74795597443241e-07, "loss": 0.7562, "step": 24905 }, { "epoch": 0.8797248431491154, "grad_norm": 0.9447949528694153, "learning_rate": 3.745783397091529e-07, "loss": 0.5487, "step": 24906 }, { "epoch": 0.8797601649528233, "grad_norm": 1.9453744888305664, "learning_rate": 3.743611425117133e-07, "loss": 0.7888, "step": 24907 }, { "epoch": 0.8797954867565312, "grad_norm": 1.5742099285125732, "learning_rate": 3.7414400585376597e-07, "loss": 0.7763, "step": 24908 }, { "epoch": 0.8798308085602391, "grad_norm": 1.8512060642242432, "learning_rate": 3.7392692973815315e-07, "loss": 0.7336, "step": 24909 }, { "epoch": 0.879866130363947, "grad_norm": 1.7631698846817017, "learning_rate": 3.737099141677142e-07, "loss": 0.7796, "step": 24910 }, { "epoch": 0.8799014521676549, "grad_norm": 1.9390724897384644, "learning_rate": 3.7349295914529016e-07, "loss": 0.7512, "step": 24911 }, { "epoch": 0.8799367739713628, "grad_norm": 1.7043616771697998, "learning_rate": 3.7327606467372157e-07, "loss": 0.7813, "step": 24912 }, { "epoch": 0.8799720957750707, "grad_norm": 1.674214243888855, "learning_rate": 3.73059230755845e-07, "loss": 0.7587, "step": 24913 }, { "epoch": 0.8800074175787787, "grad_norm": 1.8595423698425293, "learning_rate": 3.728424573944994e-07, "loss": 0.7695, "step": 24914 }, { "epoch": 0.8800427393824866, "grad_norm": 1.723720908164978, "learning_rate": 3.72625744592523e-07, "loss": 0.7704, "step": 24915 }, { "epoch": 0.8800780611861945, "grad_norm": 1.5782716274261475, "learning_rate": 3.724090923527501e-07, "loss": 0.7652, "step": 24916 }, { "epoch": 0.8801133829899024, "grad_norm": 1.7071237564086914, "learning_rate": 3.721925006780175e-07, "loss": 0.734, "step": 24917 }, { "epoch": 0.8801487047936103, "grad_norm": 1.714483380317688, "learning_rate": 3.7197596957115947e-07, "loss": 0.7547, "step": 24918 }, { "epoch": 0.8801840265973182, "grad_norm": 1.6678496599197388, "learning_rate": 3.7175949903500985e-07, "loss": 0.7622, "step": 24919 }, { "epoch": 0.8802193484010261, "grad_norm": 1.7122341394424438, "learning_rate": 3.71543089072402e-07, "loss": 0.7757, "step": 24920 }, { "epoch": 0.880254670204734, "grad_norm": 2.2156803607940674, "learning_rate": 3.7132673968616864e-07, "loss": 0.7465, "step": 24921 }, { "epoch": 0.8802899920084419, "grad_norm": 1.5569781064987183, "learning_rate": 3.7111045087913966e-07, "loss": 0.7605, "step": 24922 }, { "epoch": 0.8803253138121498, "grad_norm": 1.6928244829177856, "learning_rate": 3.7089422265414843e-07, "loss": 0.7473, "step": 24923 }, { "epoch": 0.8803606356158578, "grad_norm": 1.57931387424469, "learning_rate": 3.706780550140221e-07, "loss": 0.7464, "step": 24924 }, { "epoch": 0.8803959574195657, "grad_norm": 1.6119166612625122, "learning_rate": 3.704619479615923e-07, "loss": 0.7659, "step": 24925 }, { "epoch": 0.8804312792232736, "grad_norm": 1.8033581972122192, "learning_rate": 3.702459014996851e-07, "loss": 0.7477, "step": 24926 }, { "epoch": 0.8804666010269815, "grad_norm": 1.821956753730774, "learning_rate": 3.700299156311299e-07, "loss": 0.7912, "step": 24927 }, { "epoch": 0.8805019228306894, "grad_norm": 1.9489079713821411, "learning_rate": 3.698139903587533e-07, "loss": 0.7338, "step": 24928 }, { "epoch": 0.8805372446343973, "grad_norm": 1.7533234357833862, "learning_rate": 3.695981256853798e-07, "loss": 0.7612, "step": 24929 }, { "epoch": 0.8805725664381051, "grad_norm": 1.739694356918335, "learning_rate": 3.6938232161383593e-07, "loss": 0.7543, "step": 24930 }, { "epoch": 0.880607888241813, "grad_norm": 1.6976876258850098, "learning_rate": 3.691665781469461e-07, "loss": 0.7881, "step": 24931 }, { "epoch": 0.8806432100455209, "grad_norm": 1.7354432344436646, "learning_rate": 3.6895089528753316e-07, "loss": 0.7891, "step": 24932 }, { "epoch": 0.8806785318492288, "grad_norm": 1.702802062034607, "learning_rate": 3.6873527303842026e-07, "loss": 0.7775, "step": 24933 }, { "epoch": 0.8807138536529368, "grad_norm": 1.5457199811935425, "learning_rate": 3.6851971140243025e-07, "loss": 0.7774, "step": 24934 }, { "epoch": 0.8807491754566447, "grad_norm": 1.6456818580627441, "learning_rate": 3.683042103823831e-07, "loss": 0.7199, "step": 24935 }, { "epoch": 0.8807844972603526, "grad_norm": 2.0607481002807617, "learning_rate": 3.680887699810998e-07, "loss": 0.7567, "step": 24936 }, { "epoch": 0.8808198190640605, "grad_norm": 1.697020173072815, "learning_rate": 3.6787339020140033e-07, "loss": 0.7668, "step": 24937 }, { "epoch": 0.8808551408677684, "grad_norm": 1.6721802949905396, "learning_rate": 3.676580710461031e-07, "loss": 0.7477, "step": 24938 }, { "epoch": 0.8808904626714763, "grad_norm": 1.975478172302246, "learning_rate": 3.674428125180257e-07, "loss": 0.7583, "step": 24939 }, { "epoch": 0.8809257844751842, "grad_norm": 1.6143394708633423, "learning_rate": 3.672276146199866e-07, "loss": 0.7409, "step": 24940 }, { "epoch": 0.8809611062788921, "grad_norm": 1.7125262022018433, "learning_rate": 3.670124773548017e-07, "loss": 0.7607, "step": 24941 }, { "epoch": 0.8809964280826, "grad_norm": 1.858078122138977, "learning_rate": 3.667974007252861e-07, "loss": 0.7497, "step": 24942 }, { "epoch": 0.881031749886308, "grad_norm": 1.5774195194244385, "learning_rate": 3.6658238473425534e-07, "loss": 0.7451, "step": 24943 }, { "epoch": 0.8810670716900159, "grad_norm": 2.1090786457061768, "learning_rate": 3.6636742938452374e-07, "loss": 0.7767, "step": 24944 }, { "epoch": 0.8811023934937238, "grad_norm": 1.7382004261016846, "learning_rate": 3.6615253467890354e-07, "loss": 0.7419, "step": 24945 }, { "epoch": 0.8811377152974317, "grad_norm": 1.7645057439804077, "learning_rate": 3.659377006202075e-07, "loss": 0.7485, "step": 24946 }, { "epoch": 0.8811730371011396, "grad_norm": 1.692442774772644, "learning_rate": 3.657229272112489e-07, "loss": 0.7429, "step": 24947 }, { "epoch": 0.8812083589048475, "grad_norm": 2.439427614212036, "learning_rate": 3.6550821445483666e-07, "loss": 0.7917, "step": 24948 }, { "epoch": 0.8812436807085554, "grad_norm": 1.5725140571594238, "learning_rate": 3.6529356235378175e-07, "loss": 0.7101, "step": 24949 }, { "epoch": 0.8812790025122633, "grad_norm": 1.8984088897705078, "learning_rate": 3.6507897091089426e-07, "loss": 0.7381, "step": 24950 }, { "epoch": 0.8813143243159712, "grad_norm": 2.436227321624756, "learning_rate": 3.6486444012898137e-07, "loss": 0.758, "step": 24951 }, { "epoch": 0.8813496461196791, "grad_norm": 1.8717358112335205, "learning_rate": 3.646499700108508e-07, "loss": 0.7922, "step": 24952 }, { "epoch": 0.881384967923387, "grad_norm": 1.670966386795044, "learning_rate": 3.644355605593114e-07, "loss": 0.7695, "step": 24953 }, { "epoch": 0.881420289727095, "grad_norm": 1.7801331281661987, "learning_rate": 3.642212117771671e-07, "loss": 0.7961, "step": 24954 }, { "epoch": 0.8814556115308029, "grad_norm": 1.583589792251587, "learning_rate": 3.640069236672239e-07, "loss": 0.7777, "step": 24955 }, { "epoch": 0.8814909333345107, "grad_norm": 1.685589075088501, "learning_rate": 3.6379269623228686e-07, "loss": 0.7493, "step": 24956 }, { "epoch": 0.8815262551382186, "grad_norm": 1.4983288049697876, "learning_rate": 3.635785294751598e-07, "loss": 0.7391, "step": 24957 }, { "epoch": 0.8815615769419265, "grad_norm": 2.170077323913574, "learning_rate": 3.63364423398645e-07, "loss": 0.7471, "step": 24958 }, { "epoch": 0.8815968987456344, "grad_norm": 1.7410361766815186, "learning_rate": 3.631503780055451e-07, "loss": 0.7231, "step": 24959 }, { "epoch": 0.8816322205493423, "grad_norm": 1.658699631690979, "learning_rate": 3.629363932986629e-07, "loss": 0.7624, "step": 24960 }, { "epoch": 0.8816675423530502, "grad_norm": 1.6394617557525635, "learning_rate": 3.6272246928079626e-07, "loss": 0.7377, "step": 24961 }, { "epoch": 0.8817028641567581, "grad_norm": 1.8925551176071167, "learning_rate": 3.625086059547456e-07, "loss": 0.7857, "step": 24962 }, { "epoch": 0.881738185960466, "grad_norm": 1.6305311918258667, "learning_rate": 3.6229480332331147e-07, "loss": 0.7665, "step": 24963 }, { "epoch": 0.881773507764174, "grad_norm": 1.7318662405014038, "learning_rate": 3.620810613892906e-07, "loss": 0.7368, "step": 24964 }, { "epoch": 0.8818088295678819, "grad_norm": 1.8087340593338013, "learning_rate": 3.618673801554812e-07, "loss": 0.7625, "step": 24965 }, { "epoch": 0.8818441513715898, "grad_norm": 1.99953293800354, "learning_rate": 3.6165375962467943e-07, "loss": 0.7278, "step": 24966 }, { "epoch": 0.8818794731752977, "grad_norm": 1.8991703987121582, "learning_rate": 3.614401997996819e-07, "loss": 0.7457, "step": 24967 }, { "epoch": 0.8819147949790056, "grad_norm": 2.0197653770446777, "learning_rate": 3.612267006832826e-07, "loss": 0.7798, "step": 24968 }, { "epoch": 0.8819501167827135, "grad_norm": 1.5484347343444824, "learning_rate": 3.6101326227827584e-07, "loss": 0.7719, "step": 24969 }, { "epoch": 0.8819854385864214, "grad_norm": 2.8015735149383545, "learning_rate": 3.6079988458745663e-07, "loss": 0.7435, "step": 24970 }, { "epoch": 0.8820207603901293, "grad_norm": 1.5717540979385376, "learning_rate": 3.6058656761361556e-07, "loss": 0.7502, "step": 24971 }, { "epoch": 0.8820560821938372, "grad_norm": 1.6318272352218628, "learning_rate": 3.6037331135954535e-07, "loss": 0.7616, "step": 24972 }, { "epoch": 0.8820914039975452, "grad_norm": 1.6373215913772583, "learning_rate": 3.6016011582803766e-07, "loss": 0.7595, "step": 24973 }, { "epoch": 0.8821267258012531, "grad_norm": 1.7064975500106812, "learning_rate": 3.59946981021882e-07, "loss": 0.772, "step": 24974 }, { "epoch": 0.882162047604961, "grad_norm": 1.8329967260360718, "learning_rate": 3.5973390694386765e-07, "loss": 0.7423, "step": 24975 }, { "epoch": 0.8821973694086689, "grad_norm": 1.571366786956787, "learning_rate": 3.595208935967842e-07, "loss": 0.7612, "step": 24976 }, { "epoch": 0.8822326912123768, "grad_norm": 2.0523667335510254, "learning_rate": 3.593079409834188e-07, "loss": 0.761, "step": 24977 }, { "epoch": 0.8822680130160847, "grad_norm": 1.6949164867401123, "learning_rate": 3.5909504910655867e-07, "loss": 0.8044, "step": 24978 }, { "epoch": 0.8823033348197926, "grad_norm": 1.7231197357177734, "learning_rate": 3.588822179689899e-07, "loss": 0.7436, "step": 24979 }, { "epoch": 0.8823386566235005, "grad_norm": 1.7813150882720947, "learning_rate": 3.586694475734992e-07, "loss": 0.7744, "step": 24980 }, { "epoch": 0.8823739784272084, "grad_norm": 1.6363680362701416, "learning_rate": 3.584567379228693e-07, "loss": 0.7746, "step": 24981 }, { "epoch": 0.8824093002309162, "grad_norm": 1.9101991653442383, "learning_rate": 3.582440890198852e-07, "loss": 0.7682, "step": 24982 }, { "epoch": 0.8824446220346241, "grad_norm": 3.045966863632202, "learning_rate": 3.5803150086733016e-07, "loss": 0.7031, "step": 24983 }, { "epoch": 0.8824799438383321, "grad_norm": 3.0904886722564697, "learning_rate": 3.57818973467986e-07, "loss": 0.7959, "step": 24984 }, { "epoch": 0.88251526564204, "grad_norm": 1.724901556968689, "learning_rate": 3.576065068246343e-07, "loss": 0.7204, "step": 24985 }, { "epoch": 0.8825505874457479, "grad_norm": 1.4883348941802979, "learning_rate": 3.5739410094005664e-07, "loss": 0.7474, "step": 24986 }, { "epoch": 0.8825859092494558, "grad_norm": 1.7797287702560425, "learning_rate": 3.571817558170315e-07, "loss": 0.7446, "step": 24987 }, { "epoch": 0.8826212310531637, "grad_norm": 1.8070993423461914, "learning_rate": 3.5696947145833883e-07, "loss": 0.7732, "step": 24988 }, { "epoch": 0.8826565528568716, "grad_norm": 1.6581941843032837, "learning_rate": 3.56757247866758e-07, "loss": 0.7296, "step": 24989 }, { "epoch": 0.8826918746605795, "grad_norm": 1.6555874347686768, "learning_rate": 3.565450850450647e-07, "loss": 0.7561, "step": 24990 }, { "epoch": 0.8827271964642874, "grad_norm": 0.9446160197257996, "learning_rate": 3.5633298299603603e-07, "loss": 0.5808, "step": 24991 }, { "epoch": 0.8827625182679953, "grad_norm": 1.5762624740600586, "learning_rate": 3.561209417224493e-07, "loss": 0.7429, "step": 24992 }, { "epoch": 0.8827978400717033, "grad_norm": 1.8759057521820068, "learning_rate": 3.559089612270783e-07, "loss": 0.7383, "step": 24993 }, { "epoch": 0.8828331618754112, "grad_norm": 1.7793612480163574, "learning_rate": 3.5569704151269703e-07, "loss": 0.8121, "step": 24994 }, { "epoch": 0.8828684836791191, "grad_norm": 1.6455202102661133, "learning_rate": 3.554851825820815e-07, "loss": 0.7357, "step": 24995 }, { "epoch": 0.882903805482827, "grad_norm": 1.6106327772140503, "learning_rate": 3.552733844380013e-07, "loss": 0.7569, "step": 24996 }, { "epoch": 0.8829391272865349, "grad_norm": 1.6491438150405884, "learning_rate": 3.550616470832313e-07, "loss": 0.7836, "step": 24997 }, { "epoch": 0.8829744490902428, "grad_norm": 1.8477628231048584, "learning_rate": 3.5484997052054047e-07, "loss": 0.7954, "step": 24998 }, { "epoch": 0.8830097708939507, "grad_norm": 1.8214356899261475, "learning_rate": 3.546383547527005e-07, "loss": 0.7673, "step": 24999 }, { "epoch": 0.8830450926976586, "grad_norm": 1.7312170267105103, "learning_rate": 3.5442679978247963e-07, "loss": 0.7626, "step": 25000 }, { "epoch": 0.8830804145013665, "grad_norm": 1.705383539199829, "learning_rate": 3.5421530561264794e-07, "loss": 0.7967, "step": 25001 }, { "epoch": 0.8831157363050745, "grad_norm": 1.589597225189209, "learning_rate": 3.5400387224597375e-07, "loss": 0.7579, "step": 25002 }, { "epoch": 0.8831510581087824, "grad_norm": 1.7334684133529663, "learning_rate": 3.5379249968522266e-07, "loss": 0.7327, "step": 25003 }, { "epoch": 0.8831863799124903, "grad_norm": 1.6319994926452637, "learning_rate": 3.535811879331619e-07, "loss": 0.7615, "step": 25004 }, { "epoch": 0.8832217017161982, "grad_norm": 1.5147439241409302, "learning_rate": 3.533699369925575e-07, "loss": 0.7751, "step": 25005 }, { "epoch": 0.8832570235199061, "grad_norm": 1.6485017538070679, "learning_rate": 3.53158746866174e-07, "loss": 0.751, "step": 25006 }, { "epoch": 0.883292345323614, "grad_norm": 1.6447569131851196, "learning_rate": 3.5294761755677475e-07, "loss": 0.7948, "step": 25007 }, { "epoch": 0.8833276671273218, "grad_norm": 2.8563737869262695, "learning_rate": 3.527365490671242e-07, "loss": 0.756, "step": 25008 }, { "epoch": 0.8833629889310297, "grad_norm": 1.550367832183838, "learning_rate": 3.5252554139998343e-07, "loss": 0.7531, "step": 25009 }, { "epoch": 0.8833983107347376, "grad_norm": 1.8040046691894531, "learning_rate": 3.5231459455811525e-07, "loss": 0.8173, "step": 25010 }, { "epoch": 0.8834336325384455, "grad_norm": 0.8810243010520935, "learning_rate": 3.521037085442802e-07, "loss": 0.5687, "step": 25011 }, { "epoch": 0.8834689543421534, "grad_norm": 1.598785400390625, "learning_rate": 3.518928833612373e-07, "loss": 0.7509, "step": 25012 }, { "epoch": 0.8835042761458614, "grad_norm": 2.017537832260132, "learning_rate": 3.5168211901174696e-07, "loss": 0.7574, "step": 25013 }, { "epoch": 0.8835395979495693, "grad_norm": 1.597914457321167, "learning_rate": 3.514714154985677e-07, "loss": 0.74, "step": 25014 }, { "epoch": 0.8835749197532772, "grad_norm": 1.6139944791793823, "learning_rate": 3.512607728244571e-07, "loss": 0.7422, "step": 25015 }, { "epoch": 0.8836102415569851, "grad_norm": 1.8614768981933594, "learning_rate": 3.510501909921704e-07, "loss": 0.7932, "step": 25016 }, { "epoch": 0.883645563360693, "grad_norm": 1.6234161853790283, "learning_rate": 3.508396700044653e-07, "loss": 0.7557, "step": 25017 }, { "epoch": 0.8836808851644009, "grad_norm": 1.6710554361343384, "learning_rate": 3.506292098640973e-07, "loss": 0.7543, "step": 25018 }, { "epoch": 0.8837162069681088, "grad_norm": 1.737463355064392, "learning_rate": 3.504188105738193e-07, "loss": 0.7904, "step": 25019 }, { "epoch": 0.8837515287718167, "grad_norm": 3.0638744831085205, "learning_rate": 3.502084721363858e-07, "loss": 0.766, "step": 25020 }, { "epoch": 0.8837868505755246, "grad_norm": 2.077052354812622, "learning_rate": 3.499981945545511e-07, "loss": 0.767, "step": 25021 }, { "epoch": 0.8838221723792326, "grad_norm": 1.610351324081421, "learning_rate": 3.4978797783106424e-07, "loss": 0.7553, "step": 25022 }, { "epoch": 0.8838574941829405, "grad_norm": 1.5382598638534546, "learning_rate": 3.495778219686791e-07, "loss": 0.7405, "step": 25023 }, { "epoch": 0.8838928159866484, "grad_norm": 1.6963642835617065, "learning_rate": 3.4936772697014566e-07, "loss": 0.7784, "step": 25024 }, { "epoch": 0.8839281377903563, "grad_norm": 1.9476795196533203, "learning_rate": 3.4915769283821235e-07, "loss": 0.7449, "step": 25025 }, { "epoch": 0.8839634595940642, "grad_norm": 1.8673394918441772, "learning_rate": 3.4894771957562855e-07, "loss": 0.7611, "step": 25026 }, { "epoch": 0.8839987813977721, "grad_norm": 1.8091541528701782, "learning_rate": 3.4873780718514327e-07, "loss": 0.7788, "step": 25027 }, { "epoch": 0.88403410320148, "grad_norm": 1.7519644498825073, "learning_rate": 3.485279556695037e-07, "loss": 0.8062, "step": 25028 }, { "epoch": 0.8840694250051879, "grad_norm": 1.67307710647583, "learning_rate": 3.483181650314554e-07, "loss": 0.7733, "step": 25029 }, { "epoch": 0.8841047468088958, "grad_norm": 2.0213334560394287, "learning_rate": 3.4810843527374405e-07, "loss": 0.7546, "step": 25030 }, { "epoch": 0.8841400686126037, "grad_norm": 1.7331780195236206, "learning_rate": 3.478987663991162e-07, "loss": 0.7384, "step": 25031 }, { "epoch": 0.8841753904163117, "grad_norm": 1.6519696712493896, "learning_rate": 3.476891584103137e-07, "loss": 0.785, "step": 25032 }, { "epoch": 0.8842107122200196, "grad_norm": 1.7261642217636108, "learning_rate": 3.4747961131008145e-07, "loss": 0.7392, "step": 25033 }, { "epoch": 0.8842460340237274, "grad_norm": 1.7598949670791626, "learning_rate": 3.472701251011629e-07, "loss": 0.7797, "step": 25034 }, { "epoch": 0.8842813558274353, "grad_norm": 1.724773645401001, "learning_rate": 3.470606997862963e-07, "loss": 0.74, "step": 25035 }, { "epoch": 0.8843166776311432, "grad_norm": 1.7711845636367798, "learning_rate": 3.468513353682251e-07, "loss": 0.7685, "step": 25036 }, { "epoch": 0.8843519994348511, "grad_norm": 2.2901968955993652, "learning_rate": 3.4664203184968823e-07, "loss": 0.7648, "step": 25037 }, { "epoch": 0.884387321238559, "grad_norm": 1.7107888460159302, "learning_rate": 3.4643278923342737e-07, "loss": 0.7487, "step": 25038 }, { "epoch": 0.8844226430422669, "grad_norm": 2.2298786640167236, "learning_rate": 3.4622360752217807e-07, "loss": 0.764, "step": 25039 }, { "epoch": 0.8844579648459748, "grad_norm": 1.8227424621582031, "learning_rate": 3.4601448671867985e-07, "loss": 0.7678, "step": 25040 }, { "epoch": 0.8844932866496827, "grad_norm": 1.6180822849273682, "learning_rate": 3.4580542682566943e-07, "loss": 0.77, "step": 25041 }, { "epoch": 0.8845286084533907, "grad_norm": 1.7939984798431396, "learning_rate": 3.4559642784588234e-07, "loss": 0.7907, "step": 25042 }, { "epoch": 0.8845639302570986, "grad_norm": 2.063593626022339, "learning_rate": 3.453874897820536e-07, "loss": 0.807, "step": 25043 }, { "epoch": 0.8845992520608065, "grad_norm": 1.8356664180755615, "learning_rate": 3.4517861263692e-07, "loss": 0.7444, "step": 25044 }, { "epoch": 0.8846345738645144, "grad_norm": 1.5570738315582275, "learning_rate": 3.449697964132126e-07, "loss": 0.7755, "step": 25045 }, { "epoch": 0.8846698956682223, "grad_norm": 1.6575775146484375, "learning_rate": 3.4476104111366535e-07, "loss": 0.7738, "step": 25046 }, { "epoch": 0.8847052174719302, "grad_norm": 1.854383945465088, "learning_rate": 3.4455234674101114e-07, "loss": 0.8025, "step": 25047 }, { "epoch": 0.8847405392756381, "grad_norm": 1.6086552143096924, "learning_rate": 3.4434371329798047e-07, "loss": 0.7345, "step": 25048 }, { "epoch": 0.884775861079346, "grad_norm": 3.1942687034606934, "learning_rate": 3.441351407873039e-07, "loss": 0.745, "step": 25049 }, { "epoch": 0.8848111828830539, "grad_norm": 1.7233222723007202, "learning_rate": 3.4392662921171216e-07, "loss": 0.7632, "step": 25050 }, { "epoch": 0.8848465046867618, "grad_norm": 1.6476678848266602, "learning_rate": 3.437181785739324e-07, "loss": 0.7718, "step": 25051 }, { "epoch": 0.8848818264904698, "grad_norm": 1.716269612312317, "learning_rate": 3.435097888766947e-07, "loss": 0.7992, "step": 25052 }, { "epoch": 0.8849171482941777, "grad_norm": 1.6694352626800537, "learning_rate": 3.433014601227247e-07, "loss": 0.7869, "step": 25053 }, { "epoch": 0.8849524700978856, "grad_norm": 1.8179421424865723, "learning_rate": 3.430931923147512e-07, "loss": 0.7246, "step": 25054 }, { "epoch": 0.8849877919015935, "grad_norm": 1.8289682865142822, "learning_rate": 3.428849854554972e-07, "loss": 0.7641, "step": 25055 }, { "epoch": 0.8850231137053014, "grad_norm": 1.6144211292266846, "learning_rate": 3.4267683954768924e-07, "loss": 0.7554, "step": 25056 }, { "epoch": 0.8850584355090093, "grad_norm": 1.7903895378112793, "learning_rate": 3.4246875459405195e-07, "loss": 0.7815, "step": 25057 }, { "epoch": 0.8850937573127172, "grad_norm": 2.3515374660491943, "learning_rate": 3.4226073059730746e-07, "loss": 0.7943, "step": 25058 }, { "epoch": 0.8851290791164251, "grad_norm": 1.666718602180481, "learning_rate": 3.4205276756017926e-07, "loss": 0.7506, "step": 25059 }, { "epoch": 0.8851644009201329, "grad_norm": 1.7861512899398804, "learning_rate": 3.4184486548538955e-07, "loss": 0.7741, "step": 25060 }, { "epoch": 0.8851997227238408, "grad_norm": 1.6432384252548218, "learning_rate": 3.4163702437565724e-07, "loss": 0.7663, "step": 25061 }, { "epoch": 0.8852350445275488, "grad_norm": 1.6717387437820435, "learning_rate": 3.4142924423370414e-07, "loss": 0.7622, "step": 25062 }, { "epoch": 0.8852703663312567, "grad_norm": 1.510819673538208, "learning_rate": 3.4122152506225026e-07, "loss": 0.77, "step": 25063 }, { "epoch": 0.8853056881349646, "grad_norm": 1.6392059326171875, "learning_rate": 3.4101386686401284e-07, "loss": 0.7492, "step": 25064 }, { "epoch": 0.8853410099386725, "grad_norm": 1.664377212524414, "learning_rate": 3.408062696417097e-07, "loss": 0.7507, "step": 25065 }, { "epoch": 0.8853763317423804, "grad_norm": 2.492854118347168, "learning_rate": 3.4059873339805924e-07, "loss": 0.7502, "step": 25066 }, { "epoch": 0.8854116535460883, "grad_norm": 1.5727660655975342, "learning_rate": 3.40391258135776e-07, "loss": 0.7618, "step": 25067 }, { "epoch": 0.8854469753497962, "grad_norm": 1.8796944618225098, "learning_rate": 3.4018384385757553e-07, "loss": 0.78, "step": 25068 }, { "epoch": 0.8854822971535041, "grad_norm": 1.7341479063034058, "learning_rate": 3.3997649056617453e-07, "loss": 0.743, "step": 25069 }, { "epoch": 0.885517618957212, "grad_norm": 1.6862523555755615, "learning_rate": 3.397691982642848e-07, "loss": 0.7944, "step": 25070 }, { "epoch": 0.88555294076092, "grad_norm": 7.5325775146484375, "learning_rate": 3.395619669546191e-07, "loss": 0.7694, "step": 25071 }, { "epoch": 0.8855882625646279, "grad_norm": 1.9170420169830322, "learning_rate": 3.3935479663989026e-07, "loss": 0.7867, "step": 25072 }, { "epoch": 0.8856235843683358, "grad_norm": 1.6118263006210327, "learning_rate": 3.3914768732281e-07, "loss": 0.7853, "step": 25073 }, { "epoch": 0.8856589061720437, "grad_norm": 1.6904606819152832, "learning_rate": 3.389406390060884e-07, "loss": 0.7561, "step": 25074 }, { "epoch": 0.8856942279757516, "grad_norm": 1.891086220741272, "learning_rate": 3.3873365169243554e-07, "loss": 0.7875, "step": 25075 }, { "epoch": 0.8857295497794595, "grad_norm": 1.6574971675872803, "learning_rate": 3.3852672538456146e-07, "loss": 0.7809, "step": 25076 }, { "epoch": 0.8857648715831674, "grad_norm": 1.71002197265625, "learning_rate": 3.383198600851723e-07, "loss": 0.7833, "step": 25077 }, { "epoch": 0.8858001933868753, "grad_norm": 2.003098487854004, "learning_rate": 3.381130557969764e-07, "loss": 0.7986, "step": 25078 }, { "epoch": 0.8858355151905832, "grad_norm": 1.705399513244629, "learning_rate": 3.379063125226811e-07, "loss": 0.7449, "step": 25079 }, { "epoch": 0.8858708369942911, "grad_norm": 1.5370742082595825, "learning_rate": 3.3769963026499155e-07, "loss": 0.7333, "step": 25080 }, { "epoch": 0.885906158797999, "grad_norm": 2.318164348602295, "learning_rate": 3.374930090266121e-07, "loss": 0.7464, "step": 25081 }, { "epoch": 0.885941480601707, "grad_norm": 1.727721095085144, "learning_rate": 3.3728644881024896e-07, "loss": 0.7613, "step": 25082 }, { "epoch": 0.8859768024054149, "grad_norm": 1.612519383430481, "learning_rate": 3.370799496186028e-07, "loss": 0.7651, "step": 25083 }, { "epoch": 0.8860121242091228, "grad_norm": 1.6393765211105347, "learning_rate": 3.3687351145437865e-07, "loss": 0.7771, "step": 25084 }, { "epoch": 0.8860474460128307, "grad_norm": 1.648168683052063, "learning_rate": 3.3666713432027654e-07, "loss": 0.7683, "step": 25085 }, { "epoch": 0.8860827678165385, "grad_norm": 1.6617375612258911, "learning_rate": 3.364608182189999e-07, "loss": 0.7504, "step": 25086 }, { "epoch": 0.8861180896202464, "grad_norm": 1.5408625602722168, "learning_rate": 3.36254563153246e-07, "loss": 0.7201, "step": 25087 }, { "epoch": 0.8861534114239543, "grad_norm": 1.7391457557678223, "learning_rate": 3.3604836912571606e-07, "loss": 0.7606, "step": 25088 }, { "epoch": 0.8861887332276622, "grad_norm": 2.2307586669921875, "learning_rate": 3.3584223613911005e-07, "loss": 0.7155, "step": 25089 }, { "epoch": 0.8862240550313701, "grad_norm": 1.994593620300293, "learning_rate": 3.3563616419612197e-07, "loss": 0.766, "step": 25090 }, { "epoch": 0.886259376835078, "grad_norm": 1.6177841424942017, "learning_rate": 3.354301532994514e-07, "loss": 0.7951, "step": 25091 }, { "epoch": 0.886294698638786, "grad_norm": 1.6131625175476074, "learning_rate": 3.3522420345179494e-07, "loss": 0.7536, "step": 25092 }, { "epoch": 0.8863300204424939, "grad_norm": 1.725305438041687, "learning_rate": 3.350183146558461e-07, "loss": 0.7404, "step": 25093 }, { "epoch": 0.8863653422462018, "grad_norm": 1.5906835794448853, "learning_rate": 3.34812486914301e-07, "loss": 0.7668, "step": 25094 }, { "epoch": 0.8864006640499097, "grad_norm": 1.7954610586166382, "learning_rate": 3.346067202298531e-07, "loss": 0.7478, "step": 25095 }, { "epoch": 0.8864359858536176, "grad_norm": 1.9686365127563477, "learning_rate": 3.3440101460519567e-07, "loss": 0.7668, "step": 25096 }, { "epoch": 0.8864713076573255, "grad_norm": 2.0813522338867188, "learning_rate": 3.341953700430206e-07, "loss": 0.7405, "step": 25097 }, { "epoch": 0.8865066294610334, "grad_norm": 1.5265306234359741, "learning_rate": 3.33989786546019e-07, "loss": 0.7673, "step": 25098 }, { "epoch": 0.8865419512647413, "grad_norm": 1.6871562004089355, "learning_rate": 3.3378426411688316e-07, "loss": 0.7532, "step": 25099 }, { "epoch": 0.8865772730684492, "grad_norm": 1.7617164850234985, "learning_rate": 3.335788027583009e-07, "loss": 0.7935, "step": 25100 }, { "epoch": 0.8866125948721572, "grad_norm": 5.5314836502075195, "learning_rate": 3.333734024729618e-07, "loss": 0.7918, "step": 25101 }, { "epoch": 0.8866479166758651, "grad_norm": 1.738923192024231, "learning_rate": 3.3316806326355533e-07, "loss": 0.7883, "step": 25102 }, { "epoch": 0.886683238479573, "grad_norm": 1.8780838251113892, "learning_rate": 3.3296278513276715e-07, "loss": 0.7718, "step": 25103 }, { "epoch": 0.8867185602832809, "grad_norm": 1.9610440731048584, "learning_rate": 3.327575680832845e-07, "loss": 0.7943, "step": 25104 }, { "epoch": 0.8867538820869888, "grad_norm": 1.88761305809021, "learning_rate": 3.3255241211779477e-07, "loss": 0.7734, "step": 25105 }, { "epoch": 0.8867892038906967, "grad_norm": 1.795115351676941, "learning_rate": 3.323473172389807e-07, "loss": 0.7403, "step": 25106 }, { "epoch": 0.8868245256944046, "grad_norm": 1.851279616355896, "learning_rate": 3.3214228344952804e-07, "loss": 0.8354, "step": 25107 }, { "epoch": 0.8868598474981125, "grad_norm": 2.1476168632507324, "learning_rate": 3.31937310752119e-07, "loss": 0.7592, "step": 25108 }, { "epoch": 0.8868951693018204, "grad_norm": 1.5559625625610352, "learning_rate": 3.317323991494381e-07, "loss": 0.7511, "step": 25109 }, { "epoch": 0.8869304911055284, "grad_norm": 1.6148793697357178, "learning_rate": 3.315275486441649e-07, "loss": 0.7861, "step": 25110 }, { "epoch": 0.8869658129092363, "grad_norm": 2.386364459991455, "learning_rate": 3.3132275923898117e-07, "loss": 0.7864, "step": 25111 }, { "epoch": 0.8870011347129441, "grad_norm": 2.0166263580322266, "learning_rate": 3.311180309365691e-07, "loss": 0.7549, "step": 25112 }, { "epoch": 0.887036456516652, "grad_norm": 1.5862113237380981, "learning_rate": 3.3091336373960557e-07, "loss": 0.7329, "step": 25113 }, { "epoch": 0.8870717783203599, "grad_norm": 1.6832029819488525, "learning_rate": 3.3070875765077003e-07, "loss": 0.756, "step": 25114 }, { "epoch": 0.8871071001240678, "grad_norm": 1.5868481397628784, "learning_rate": 3.3050421267274145e-07, "loss": 0.7693, "step": 25115 }, { "epoch": 0.8871424219277757, "grad_norm": 1.6688613891601562, "learning_rate": 3.3029972880819496e-07, "loss": 0.74, "step": 25116 }, { "epoch": 0.8871777437314836, "grad_norm": 1.896844506263733, "learning_rate": 3.3009530605980835e-07, "loss": 0.7729, "step": 25117 }, { "epoch": 0.8872130655351915, "grad_norm": 1.7492181062698364, "learning_rate": 3.298909444302567e-07, "loss": 0.7489, "step": 25118 }, { "epoch": 0.8872483873388994, "grad_norm": 1.608871340751648, "learning_rate": 3.2968664392221404e-07, "loss": 0.7283, "step": 25119 }, { "epoch": 0.8872837091426073, "grad_norm": 1.6533761024475098, "learning_rate": 3.294824045383549e-07, "loss": 0.7504, "step": 25120 }, { "epoch": 0.8873190309463153, "grad_norm": 1.8098970651626587, "learning_rate": 3.2927822628135265e-07, "loss": 0.7722, "step": 25121 }, { "epoch": 0.8873543527500232, "grad_norm": 2.0063765048980713, "learning_rate": 3.2907410915387793e-07, "loss": 0.783, "step": 25122 }, { "epoch": 0.8873896745537311, "grad_norm": 1.9568825960159302, "learning_rate": 3.288700531586037e-07, "loss": 0.7771, "step": 25123 }, { "epoch": 0.887424996357439, "grad_norm": 1.5114628076553345, "learning_rate": 3.286660582982004e-07, "loss": 0.7591, "step": 25124 }, { "epoch": 0.8874603181611469, "grad_norm": 1.7289328575134277, "learning_rate": 3.284621245753372e-07, "loss": 0.753, "step": 25125 }, { "epoch": 0.8874956399648548, "grad_norm": 1.7182332277297974, "learning_rate": 3.2825825199268414e-07, "loss": 0.7784, "step": 25126 }, { "epoch": 0.8875309617685627, "grad_norm": 1.7019587755203247, "learning_rate": 3.280544405529079e-07, "loss": 0.7634, "step": 25127 }, { "epoch": 0.8875662835722706, "grad_norm": 1.7672260999679565, "learning_rate": 3.278506902586781e-07, "loss": 0.7399, "step": 25128 }, { "epoch": 0.8876016053759785, "grad_norm": 1.6667773723602295, "learning_rate": 3.276470011126592e-07, "loss": 0.7307, "step": 25129 }, { "epoch": 0.8876369271796865, "grad_norm": 1.6469624042510986, "learning_rate": 3.274433731175186e-07, "loss": 0.7567, "step": 25130 }, { "epoch": 0.8876722489833944, "grad_norm": 1.5956448316574097, "learning_rate": 3.2723980627592077e-07, "loss": 0.7135, "step": 25131 }, { "epoch": 0.8877075707871023, "grad_norm": 1.8783730268478394, "learning_rate": 3.270363005905297e-07, "loss": 0.7716, "step": 25132 }, { "epoch": 0.8877428925908102, "grad_norm": 2.405362844467163, "learning_rate": 3.268328560640088e-07, "loss": 0.7584, "step": 25133 }, { "epoch": 0.8877782143945181, "grad_norm": 1.6650052070617676, "learning_rate": 3.2662947269902214e-07, "loss": 0.7228, "step": 25134 }, { "epoch": 0.887813536198226, "grad_norm": 1.6648441553115845, "learning_rate": 3.264261504982291e-07, "loss": 0.7848, "step": 25135 }, { "epoch": 0.8878488580019339, "grad_norm": 1.7213791608810425, "learning_rate": 3.262228894642927e-07, "loss": 0.7797, "step": 25136 }, { "epoch": 0.8878841798056418, "grad_norm": 1.543666124343872, "learning_rate": 3.2601968959987296e-07, "loss": 0.7361, "step": 25137 }, { "epoch": 0.8879195016093496, "grad_norm": 1.664819359779358, "learning_rate": 3.2581655090762833e-07, "loss": 0.7851, "step": 25138 }, { "epoch": 0.8879548234130575, "grad_norm": 1.6211825609207153, "learning_rate": 3.2561347339021777e-07, "loss": 0.7615, "step": 25139 }, { "epoch": 0.8879901452167654, "grad_norm": 1.6093119382858276, "learning_rate": 3.254104570503003e-07, "loss": 0.7513, "step": 25140 }, { "epoch": 0.8880254670204734, "grad_norm": 1.8088880777359009, "learning_rate": 3.252075018905315e-07, "loss": 0.7506, "step": 25141 }, { "epoch": 0.8880607888241813, "grad_norm": 1.7020426988601685, "learning_rate": 3.250046079135677e-07, "loss": 0.7825, "step": 25142 }, { "epoch": 0.8880961106278892, "grad_norm": 1.6666865348815918, "learning_rate": 3.24801775122065e-07, "loss": 0.7856, "step": 25143 }, { "epoch": 0.8881314324315971, "grad_norm": 1.7922734022140503, "learning_rate": 3.245990035186797e-07, "loss": 0.7612, "step": 25144 }, { "epoch": 0.888166754235305, "grad_norm": 1.5979857444763184, "learning_rate": 3.243962931060618e-07, "loss": 0.7466, "step": 25145 }, { "epoch": 0.8882020760390129, "grad_norm": 1.6210296154022217, "learning_rate": 3.241936438868665e-07, "loss": 0.7427, "step": 25146 }, { "epoch": 0.8882373978427208, "grad_norm": 1.5560413599014282, "learning_rate": 3.239910558637466e-07, "loss": 0.7404, "step": 25147 }, { "epoch": 0.8882727196464287, "grad_norm": 2.0683248043060303, "learning_rate": 3.2378852903935164e-07, "loss": 0.8126, "step": 25148 }, { "epoch": 0.8883080414501366, "grad_norm": 1.688315749168396, "learning_rate": 3.23586063416334e-07, "loss": 0.7564, "step": 25149 }, { "epoch": 0.8883433632538446, "grad_norm": 1.6580348014831543, "learning_rate": 3.2338365899734324e-07, "loss": 0.7523, "step": 25150 }, { "epoch": 0.8883786850575525, "grad_norm": 1.7354824542999268, "learning_rate": 3.231813157850272e-07, "loss": 0.7925, "step": 25151 }, { "epoch": 0.8884140068612604, "grad_norm": 2.168050765991211, "learning_rate": 3.2297903378203543e-07, "loss": 0.802, "step": 25152 }, { "epoch": 0.8884493286649683, "grad_norm": 1.7056299448013306, "learning_rate": 3.2277681299101416e-07, "loss": 0.7555, "step": 25153 }, { "epoch": 0.8884846504686762, "grad_norm": 1.7947129011154175, "learning_rate": 3.2257465341461183e-07, "loss": 0.7468, "step": 25154 }, { "epoch": 0.8885199722723841, "grad_norm": 1.7703166007995605, "learning_rate": 3.2237255505547293e-07, "loss": 0.7359, "step": 25155 }, { "epoch": 0.888555294076092, "grad_norm": 1.618135690689087, "learning_rate": 3.221705179162421e-07, "loss": 0.7601, "step": 25156 }, { "epoch": 0.8885906158797999, "grad_norm": 2.891934871673584, "learning_rate": 3.2196854199956496e-07, "loss": 0.7409, "step": 25157 }, { "epoch": 0.8886259376835078, "grad_norm": 1.964207649230957, "learning_rate": 3.217666273080838e-07, "loss": 0.7317, "step": 25158 }, { "epoch": 0.8886612594872157, "grad_norm": 1.7533390522003174, "learning_rate": 3.2156477384444106e-07, "loss": 0.7695, "step": 25159 }, { "epoch": 0.8886965812909237, "grad_norm": 1.5954502820968628, "learning_rate": 3.213629816112807e-07, "loss": 0.7629, "step": 25160 }, { "epoch": 0.8887319030946316, "grad_norm": 1.636685848236084, "learning_rate": 3.211612506112405e-07, "loss": 0.7191, "step": 25161 }, { "epoch": 0.8887672248983395, "grad_norm": 1.9101653099060059, "learning_rate": 3.209595808469629e-07, "loss": 0.7333, "step": 25162 }, { "epoch": 0.8888025467020474, "grad_norm": 1.6476458311080933, "learning_rate": 3.207579723210874e-07, "loss": 0.7553, "step": 25163 }, { "epoch": 0.8888378685057552, "grad_norm": 1.6912086009979248, "learning_rate": 3.2055642503625196e-07, "loss": 0.7926, "step": 25164 }, { "epoch": 0.8888731903094631, "grad_norm": 1.7781991958618164, "learning_rate": 3.203549389950933e-07, "loss": 0.7795, "step": 25165 }, { "epoch": 0.888908512113171, "grad_norm": 1.7265894412994385, "learning_rate": 3.2015351420024987e-07, "loss": 0.7857, "step": 25166 }, { "epoch": 0.8889438339168789, "grad_norm": 1.705973505973816, "learning_rate": 3.199521506543585e-07, "loss": 0.7648, "step": 25167 }, { "epoch": 0.8889791557205868, "grad_norm": 2.405853509902954, "learning_rate": 3.197508483600525e-07, "loss": 0.7549, "step": 25168 }, { "epoch": 0.8890144775242947, "grad_norm": 1.6470856666564941, "learning_rate": 3.195496073199678e-07, "loss": 0.7878, "step": 25169 }, { "epoch": 0.8890497993280027, "grad_norm": 1.7350281476974487, "learning_rate": 3.193484275367381e-07, "loss": 0.7254, "step": 25170 }, { "epoch": 0.8890851211317106, "grad_norm": 1.784574270248413, "learning_rate": 3.1914730901299653e-07, "loss": 0.774, "step": 25171 }, { "epoch": 0.8891204429354185, "grad_norm": 1.8498783111572266, "learning_rate": 3.1894625175137475e-07, "loss": 0.789, "step": 25172 }, { "epoch": 0.8891557647391264, "grad_norm": 1.6519259214401245, "learning_rate": 3.187452557545051e-07, "loss": 0.7733, "step": 25173 }, { "epoch": 0.8891910865428343, "grad_norm": 1.7625821828842163, "learning_rate": 3.1854432102501664e-07, "loss": 0.7464, "step": 25174 }, { "epoch": 0.8892264083465422, "grad_norm": 1.8331615924835205, "learning_rate": 3.1834344756554e-07, "loss": 0.7624, "step": 25175 }, { "epoch": 0.8892617301502501, "grad_norm": 1.5893052816390991, "learning_rate": 3.1814263537870535e-07, "loss": 0.7536, "step": 25176 }, { "epoch": 0.889297051953958, "grad_norm": 1.7948824167251587, "learning_rate": 3.1794188446713947e-07, "loss": 0.7454, "step": 25177 }, { "epoch": 0.8893323737576659, "grad_norm": 1.611128330230713, "learning_rate": 3.1774119483346965e-07, "loss": 0.7539, "step": 25178 }, { "epoch": 0.8893676955613738, "grad_norm": 1.7457633018493652, "learning_rate": 3.175405664803233e-07, "loss": 0.7455, "step": 25179 }, { "epoch": 0.8894030173650818, "grad_norm": 1.718245029449463, "learning_rate": 3.173399994103255e-07, "loss": 0.7647, "step": 25180 }, { "epoch": 0.8894383391687897, "grad_norm": 1.603921890258789, "learning_rate": 3.171394936261019e-07, "loss": 0.7473, "step": 25181 }, { "epoch": 0.8894736609724976, "grad_norm": 1.6896666288375854, "learning_rate": 3.1693904913027597e-07, "loss": 0.7842, "step": 25182 }, { "epoch": 0.8895089827762055, "grad_norm": 1.7142407894134521, "learning_rate": 3.167386659254723e-07, "loss": 0.7482, "step": 25183 }, { "epoch": 0.8895443045799134, "grad_norm": 1.5641281604766846, "learning_rate": 3.165383440143116e-07, "loss": 0.7037, "step": 25184 }, { "epoch": 0.8895796263836213, "grad_norm": 1.5387418270111084, "learning_rate": 3.1633808339941677e-07, "loss": 0.7427, "step": 25185 }, { "epoch": 0.8896149481873292, "grad_norm": 1.6376358270645142, "learning_rate": 3.1613788408340895e-07, "loss": 0.7567, "step": 25186 }, { "epoch": 0.8896502699910371, "grad_norm": 1.798859715461731, "learning_rate": 3.159377460689078e-07, "loss": 0.7493, "step": 25187 }, { "epoch": 0.889685591794745, "grad_norm": 1.822168231010437, "learning_rate": 3.157376693585329e-07, "loss": 0.7842, "step": 25188 }, { "epoch": 0.889720913598453, "grad_norm": 3.78690505027771, "learning_rate": 3.1553765395490375e-07, "loss": 0.7453, "step": 25189 }, { "epoch": 0.8897562354021609, "grad_norm": 1.9331514835357666, "learning_rate": 3.153376998606361e-07, "loss": 0.7367, "step": 25190 }, { "epoch": 0.8897915572058687, "grad_norm": 3.0433297157287598, "learning_rate": 3.1513780707834784e-07, "loss": 0.7367, "step": 25191 }, { "epoch": 0.8898268790095766, "grad_norm": 1.76140558719635, "learning_rate": 3.1493797561065633e-07, "loss": 0.7522, "step": 25192 }, { "epoch": 0.8898622008132845, "grad_norm": 1.8372644186019897, "learning_rate": 3.1473820546017497e-07, "loss": 0.7474, "step": 25193 }, { "epoch": 0.8898975226169924, "grad_norm": 1.6669074296951294, "learning_rate": 3.145384966295195e-07, "loss": 0.7683, "step": 25194 }, { "epoch": 0.8899328444207003, "grad_norm": 1.7147765159606934, "learning_rate": 3.1433884912130454e-07, "loss": 0.7727, "step": 25195 }, { "epoch": 0.8899681662244082, "grad_norm": 1.7865312099456787, "learning_rate": 3.1413926293814066e-07, "loss": 0.7485, "step": 25196 }, { "epoch": 0.8900034880281161, "grad_norm": 2.024658679962158, "learning_rate": 3.139397380826409e-07, "loss": 0.7391, "step": 25197 }, { "epoch": 0.890038809831824, "grad_norm": 1.799634575843811, "learning_rate": 3.137402745574181e-07, "loss": 0.7539, "step": 25198 }, { "epoch": 0.890074131635532, "grad_norm": 1.9594744443893433, "learning_rate": 3.1354087236508123e-07, "loss": 0.76, "step": 25199 }, { "epoch": 0.8901094534392399, "grad_norm": 1.7747294902801514, "learning_rate": 3.1334153150824107e-07, "loss": 0.7159, "step": 25200 }, { "epoch": 0.8901447752429478, "grad_norm": 1.6939213275909424, "learning_rate": 3.1314225198950497e-07, "loss": 0.768, "step": 25201 }, { "epoch": 0.8901800970466557, "grad_norm": 1.6203209161758423, "learning_rate": 3.129430338114825e-07, "loss": 0.7544, "step": 25202 }, { "epoch": 0.8902154188503636, "grad_norm": 1.6688659191131592, "learning_rate": 3.1274387697678046e-07, "loss": 0.7492, "step": 25203 }, { "epoch": 0.8902507406540715, "grad_norm": 1.7029988765716553, "learning_rate": 3.1254478148800506e-07, "loss": 0.7768, "step": 25204 }, { "epoch": 0.8902860624577794, "grad_norm": 1.8878099918365479, "learning_rate": 3.1234574734776313e-07, "loss": 0.7574, "step": 25205 }, { "epoch": 0.8903213842614873, "grad_norm": 1.6313177347183228, "learning_rate": 3.1214677455865814e-07, "loss": 0.7451, "step": 25206 }, { "epoch": 0.8903567060651952, "grad_norm": 1.7136783599853516, "learning_rate": 3.119478631232953e-07, "loss": 0.8133, "step": 25207 }, { "epoch": 0.8903920278689031, "grad_norm": 1.6293623447418213, "learning_rate": 3.1174901304427795e-07, "loss": 0.7487, "step": 25208 }, { "epoch": 0.8904273496726111, "grad_norm": 1.8811143636703491, "learning_rate": 3.1155022432420744e-07, "loss": 0.7468, "step": 25209 }, { "epoch": 0.890462671476319, "grad_norm": 1.6067508459091187, "learning_rate": 3.1135149696568666e-07, "loss": 0.7513, "step": 25210 }, { "epoch": 0.8904979932800269, "grad_norm": 1.9041318893432617, "learning_rate": 3.111528309713158e-07, "loss": 0.7573, "step": 25211 }, { "epoch": 0.8905333150837348, "grad_norm": 1.9437066316604614, "learning_rate": 3.10954226343696e-07, "loss": 0.752, "step": 25212 }, { "epoch": 0.8905686368874427, "grad_norm": 1.8596664667129517, "learning_rate": 3.1075568308542525e-07, "loss": 0.7921, "step": 25213 }, { "epoch": 0.8906039586911506, "grad_norm": 1.8034332990646362, "learning_rate": 3.105572011991026e-07, "loss": 0.7837, "step": 25214 }, { "epoch": 0.8906392804948585, "grad_norm": 1.883270263671875, "learning_rate": 3.103587806873265e-07, "loss": 0.7573, "step": 25215 }, { "epoch": 0.8906746022985664, "grad_norm": 1.6324524879455566, "learning_rate": 3.1016042155269266e-07, "loss": 0.7767, "step": 25216 }, { "epoch": 0.8907099241022742, "grad_norm": 1.8460662364959717, "learning_rate": 3.0996212379779733e-07, "loss": 0.779, "step": 25217 }, { "epoch": 0.8907452459059821, "grad_norm": 2.984861373901367, "learning_rate": 3.0976388742523733e-07, "loss": 0.7546, "step": 25218 }, { "epoch": 0.89078056770969, "grad_norm": 1.5912909507751465, "learning_rate": 3.09565712437605e-07, "loss": 0.7446, "step": 25219 }, { "epoch": 0.890815889513398, "grad_norm": 1.7590886354446411, "learning_rate": 3.0936759883749445e-07, "loss": 0.7313, "step": 25220 }, { "epoch": 0.8908512113171059, "grad_norm": 1.6676826477050781, "learning_rate": 3.091695466274991e-07, "loss": 0.761, "step": 25221 }, { "epoch": 0.8908865331208138, "grad_norm": 1.7367795705795288, "learning_rate": 3.0897155581021187e-07, "loss": 0.7616, "step": 25222 }, { "epoch": 0.8909218549245217, "grad_norm": 1.7004039287567139, "learning_rate": 3.087736263882224e-07, "loss": 0.7483, "step": 25223 }, { "epoch": 0.8909571767282296, "grad_norm": 2.0121474266052246, "learning_rate": 3.085757583641219e-07, "loss": 0.8156, "step": 25224 }, { "epoch": 0.8909924985319375, "grad_norm": 1.7337887287139893, "learning_rate": 3.0837795174050055e-07, "loss": 0.7564, "step": 25225 }, { "epoch": 0.8910278203356454, "grad_norm": 1.5761579275131226, "learning_rate": 3.081802065199457e-07, "loss": 0.7376, "step": 25226 }, { "epoch": 0.8910631421393533, "grad_norm": 1.6557763814926147, "learning_rate": 3.079825227050465e-07, "loss": 0.7504, "step": 25227 }, { "epoch": 0.8910984639430612, "grad_norm": 1.8257372379302979, "learning_rate": 3.077849002983907e-07, "loss": 0.725, "step": 25228 }, { "epoch": 0.8911337857467692, "grad_norm": 1.5636781454086304, "learning_rate": 3.0758733930256356e-07, "loss": 0.7251, "step": 25229 }, { "epoch": 0.8911691075504771, "grad_norm": 1.7615324258804321, "learning_rate": 3.0738983972015133e-07, "loss": 0.7967, "step": 25230 }, { "epoch": 0.891204429354185, "grad_norm": 1.7164621353149414, "learning_rate": 3.071924015537392e-07, "loss": 0.7388, "step": 25231 }, { "epoch": 0.8912397511578929, "grad_norm": 1.69948410987854, "learning_rate": 3.0699502480591e-07, "loss": 0.773, "step": 25232 }, { "epoch": 0.8912750729616008, "grad_norm": 1.5356056690216064, "learning_rate": 3.0679770947924793e-07, "loss": 0.757, "step": 25233 }, { "epoch": 0.8913103947653087, "grad_norm": 1.627829909324646, "learning_rate": 3.066004555763358e-07, "loss": 0.7734, "step": 25234 }, { "epoch": 0.8913457165690166, "grad_norm": 1.9380282163619995, "learning_rate": 3.064032630997543e-07, "loss": 0.7776, "step": 25235 }, { "epoch": 0.8913810383727245, "grad_norm": 1.708222508430481, "learning_rate": 3.0620613205208425e-07, "loss": 0.786, "step": 25236 }, { "epoch": 0.8914163601764324, "grad_norm": 1.5428828001022339, "learning_rate": 3.060090624359069e-07, "loss": 0.7465, "step": 25237 }, { "epoch": 0.8914516819801404, "grad_norm": 1.9836207628250122, "learning_rate": 3.0581205425380013e-07, "loss": 0.745, "step": 25238 }, { "epoch": 0.8914870037838483, "grad_norm": 1.9261786937713623, "learning_rate": 3.056151075083424e-07, "loss": 0.7734, "step": 25239 }, { "epoch": 0.8915223255875562, "grad_norm": 1.5767993927001953, "learning_rate": 3.054182222021118e-07, "loss": 0.749, "step": 25240 }, { "epoch": 0.8915576473912641, "grad_norm": 1.7486000061035156, "learning_rate": 3.052213983376856e-07, "loss": 0.7557, "step": 25241 }, { "epoch": 0.891592969194972, "grad_norm": 2.083920955657959, "learning_rate": 3.0502463591763843e-07, "loss": 0.7327, "step": 25242 }, { "epoch": 0.8916282909986798, "grad_norm": 1.8231343030929565, "learning_rate": 3.0482793494454654e-07, "loss": 0.7584, "step": 25243 }, { "epoch": 0.8916636128023877, "grad_norm": 1.8375252485275269, "learning_rate": 3.0463129542098457e-07, "loss": 0.8026, "step": 25244 }, { "epoch": 0.8916989346060956, "grad_norm": 1.7547374963760376, "learning_rate": 3.044347173495249e-07, "loss": 0.794, "step": 25245 }, { "epoch": 0.8917342564098035, "grad_norm": 1.9247090816497803, "learning_rate": 3.04238200732741e-07, "loss": 0.7444, "step": 25246 }, { "epoch": 0.8917695782135114, "grad_norm": 1.644339919090271, "learning_rate": 3.040417455732053e-07, "loss": 0.7471, "step": 25247 }, { "epoch": 0.8918049000172193, "grad_norm": 0.915313720703125, "learning_rate": 3.0384535187348794e-07, "loss": 0.5662, "step": 25248 }, { "epoch": 0.8918402218209273, "grad_norm": 1.8221567869186401, "learning_rate": 3.036490196361597e-07, "loss": 0.7591, "step": 25249 }, { "epoch": 0.8918755436246352, "grad_norm": 1.6550825834274292, "learning_rate": 3.0345274886379117e-07, "loss": 0.7882, "step": 25250 }, { "epoch": 0.8919108654283431, "grad_norm": 1.7828654050827026, "learning_rate": 3.032565395589493e-07, "loss": 0.7496, "step": 25251 }, { "epoch": 0.891946187232051, "grad_norm": 1.8691039085388184, "learning_rate": 3.030603917242031e-07, "loss": 0.8006, "step": 25252 }, { "epoch": 0.8919815090357589, "grad_norm": 1.7996245622634888, "learning_rate": 3.028643053621205e-07, "loss": 0.7537, "step": 25253 }, { "epoch": 0.8920168308394668, "grad_norm": 1.723630666732788, "learning_rate": 3.026682804752662e-07, "loss": 0.7536, "step": 25254 }, { "epoch": 0.8920521526431747, "grad_norm": 1.6044772863388062, "learning_rate": 3.024723170662069e-07, "loss": 0.7417, "step": 25255 }, { "epoch": 0.8920874744468826, "grad_norm": 1.661411166191101, "learning_rate": 3.0227641513750627e-07, "loss": 0.7797, "step": 25256 }, { "epoch": 0.8921227962505905, "grad_norm": 2.1111409664154053, "learning_rate": 3.0208057469172933e-07, "loss": 0.7319, "step": 25257 }, { "epoch": 0.8921581180542985, "grad_norm": 1.8540246486663818, "learning_rate": 3.01884795731438e-07, "loss": 0.745, "step": 25258 }, { "epoch": 0.8921934398580064, "grad_norm": 1.699723482131958, "learning_rate": 3.0168907825919525e-07, "loss": 0.7693, "step": 25259 }, { "epoch": 0.8922287616617143, "grad_norm": 1.6705191135406494, "learning_rate": 3.0149342227756397e-07, "loss": 0.7226, "step": 25260 }, { "epoch": 0.8922640834654222, "grad_norm": 2.5925216674804688, "learning_rate": 3.012978277891021e-07, "loss": 0.7884, "step": 25261 }, { "epoch": 0.8922994052691301, "grad_norm": 1.799813151359558, "learning_rate": 3.0110229479637156e-07, "loss": 0.7651, "step": 25262 }, { "epoch": 0.892334727072838, "grad_norm": 1.650699257850647, "learning_rate": 3.0090682330193133e-07, "loss": 0.765, "step": 25263 }, { "epoch": 0.8923700488765459, "grad_norm": 1.5375596284866333, "learning_rate": 3.007114133083389e-07, "loss": 0.7323, "step": 25264 }, { "epoch": 0.8924053706802538, "grad_norm": 1.7086659669876099, "learning_rate": 3.0051606481815157e-07, "loss": 0.7847, "step": 25265 }, { "epoch": 0.8924406924839617, "grad_norm": 2.3199925422668457, "learning_rate": 3.0032077783392786e-07, "loss": 0.7369, "step": 25266 }, { "epoch": 0.8924760142876697, "grad_norm": 1.6586326360702515, "learning_rate": 3.001255523582214e-07, "loss": 0.7438, "step": 25267 }, { "epoch": 0.8925113360913776, "grad_norm": 2.068918228149414, "learning_rate": 2.9993038839358777e-07, "loss": 0.7539, "step": 25268 }, { "epoch": 0.8925466578950854, "grad_norm": 3.5251662731170654, "learning_rate": 2.997352859425823e-07, "loss": 0.74, "step": 25269 }, { "epoch": 0.8925819796987933, "grad_norm": 1.8059146404266357, "learning_rate": 2.995402450077584e-07, "loss": 0.7636, "step": 25270 }, { "epoch": 0.8926173015025012, "grad_norm": 1.588613748550415, "learning_rate": 2.993452655916673e-07, "loss": 0.7214, "step": 25271 }, { "epoch": 0.8926526233062091, "grad_norm": 3.0775647163391113, "learning_rate": 2.991503476968621e-07, "loss": 0.7408, "step": 25272 }, { "epoch": 0.892687945109917, "grad_norm": 1.7112478017807007, "learning_rate": 2.989554913258941e-07, "loss": 0.7906, "step": 25273 }, { "epoch": 0.8927232669136249, "grad_norm": 1.5519261360168457, "learning_rate": 2.987606964813128e-07, "loss": 0.7759, "step": 25274 }, { "epoch": 0.8927585887173328, "grad_norm": 1.6383951902389526, "learning_rate": 2.9856596316566675e-07, "loss": 0.78, "step": 25275 }, { "epoch": 0.8927939105210407, "grad_norm": 2.348133087158203, "learning_rate": 2.983712913815068e-07, "loss": 0.7977, "step": 25276 }, { "epoch": 0.8928292323247486, "grad_norm": 1.7351950407028198, "learning_rate": 2.981766811313791e-07, "loss": 0.7854, "step": 25277 }, { "epoch": 0.8928645541284566, "grad_norm": 1.6787307262420654, "learning_rate": 2.979821324178306e-07, "loss": 0.797, "step": 25278 }, { "epoch": 0.8928998759321645, "grad_norm": 1.7020394802093506, "learning_rate": 2.977876452434081e-07, "loss": 0.7651, "step": 25279 }, { "epoch": 0.8929351977358724, "grad_norm": 4.358826637268066, "learning_rate": 2.975932196106579e-07, "loss": 0.7509, "step": 25280 }, { "epoch": 0.8929705195395803, "grad_norm": 1.7944642305374146, "learning_rate": 2.97398855522123e-07, "loss": 0.7654, "step": 25281 }, { "epoch": 0.8930058413432882, "grad_norm": 1.799708604812622, "learning_rate": 2.9720455298034745e-07, "loss": 0.775, "step": 25282 }, { "epoch": 0.8930411631469961, "grad_norm": 1.56451416015625, "learning_rate": 2.970103119878759e-07, "loss": 0.7109, "step": 25283 }, { "epoch": 0.893076484950704, "grad_norm": 1.5397554636001587, "learning_rate": 2.968161325472485e-07, "loss": 0.7549, "step": 25284 }, { "epoch": 0.8931118067544119, "grad_norm": 1.6660529375076294, "learning_rate": 2.9662201466100717e-07, "loss": 0.7758, "step": 25285 }, { "epoch": 0.8931471285581198, "grad_norm": 1.5425608158111572, "learning_rate": 2.964279583316937e-07, "loss": 0.7593, "step": 25286 }, { "epoch": 0.8931824503618278, "grad_norm": 1.8391075134277344, "learning_rate": 2.962339635618466e-07, "loss": 0.7798, "step": 25287 }, { "epoch": 0.8932177721655357, "grad_norm": 1.632155179977417, "learning_rate": 2.960400303540045e-07, "loss": 0.7486, "step": 25288 }, { "epoch": 0.8932530939692436, "grad_norm": 1.8313353061676025, "learning_rate": 2.958461587107075e-07, "loss": 0.7788, "step": 25289 }, { "epoch": 0.8932884157729515, "grad_norm": 1.8436030149459839, "learning_rate": 2.9565234863449086e-07, "loss": 0.7538, "step": 25290 }, { "epoch": 0.8933237375766594, "grad_norm": 1.6370688676834106, "learning_rate": 2.954586001278914e-07, "loss": 0.7399, "step": 25291 }, { "epoch": 0.8933590593803673, "grad_norm": 1.6063629388809204, "learning_rate": 2.9526491319344707e-07, "loss": 0.7798, "step": 25292 }, { "epoch": 0.8933943811840752, "grad_norm": 1.7846180200576782, "learning_rate": 2.9507128783369034e-07, "loss": 0.7771, "step": 25293 }, { "epoch": 0.8934297029877831, "grad_norm": 1.8358110189437866, "learning_rate": 2.9487772405115577e-07, "loss": 0.8051, "step": 25294 }, { "epoch": 0.8934650247914909, "grad_norm": 1.6559516191482544, "learning_rate": 2.94684221848377e-07, "loss": 0.7275, "step": 25295 }, { "epoch": 0.8935003465951988, "grad_norm": 1.8634490966796875, "learning_rate": 2.9449078122788745e-07, "loss": 0.7835, "step": 25296 }, { "epoch": 0.8935356683989067, "grad_norm": 1.6595861911773682, "learning_rate": 2.942974021922174e-07, "loss": 0.7656, "step": 25297 }, { "epoch": 0.8935709902026147, "grad_norm": 1.7969698905944824, "learning_rate": 2.9410408474389807e-07, "loss": 0.7483, "step": 25298 }, { "epoch": 0.8936063120063226, "grad_norm": 2.0564942359924316, "learning_rate": 2.939108288854609e-07, "loss": 0.7427, "step": 25299 }, { "epoch": 0.8936416338100305, "grad_norm": 3.1967711448669434, "learning_rate": 2.9371763461943314e-07, "loss": 0.7489, "step": 25300 }, { "epoch": 0.8936769556137384, "grad_norm": 1.6645663976669312, "learning_rate": 2.9352450194834457e-07, "loss": 0.7723, "step": 25301 }, { "epoch": 0.8937122774174463, "grad_norm": 1.74080228805542, "learning_rate": 2.9333143087472314e-07, "loss": 0.7884, "step": 25302 }, { "epoch": 0.8937475992211542, "grad_norm": 1.8338544368743896, "learning_rate": 2.931384214010946e-07, "loss": 0.7167, "step": 25303 }, { "epoch": 0.8937829210248621, "grad_norm": 1.7597283124923706, "learning_rate": 2.929454735299858e-07, "loss": 0.7694, "step": 25304 }, { "epoch": 0.89381824282857, "grad_norm": 1.6678614616394043, "learning_rate": 2.9275258726392195e-07, "loss": 0.7573, "step": 25305 }, { "epoch": 0.8938535646322779, "grad_norm": 1.9483846426010132, "learning_rate": 2.925597626054272e-07, "loss": 0.7831, "step": 25306 }, { "epoch": 0.8938888864359859, "grad_norm": 1.610872745513916, "learning_rate": 2.9236699955702494e-07, "loss": 0.7454, "step": 25307 }, { "epoch": 0.8939242082396938, "grad_norm": 1.9458175897598267, "learning_rate": 2.9217429812123945e-07, "loss": 0.7221, "step": 25308 }, { "epoch": 0.8939595300434017, "grad_norm": 1.94264817237854, "learning_rate": 2.919816583005913e-07, "loss": 0.7457, "step": 25309 }, { "epoch": 0.8939948518471096, "grad_norm": 1.7398406267166138, "learning_rate": 2.91789080097602e-07, "loss": 0.7631, "step": 25310 }, { "epoch": 0.8940301736508175, "grad_norm": 1.7594491243362427, "learning_rate": 2.915965635147927e-07, "loss": 0.7819, "step": 25311 }, { "epoch": 0.8940654954545254, "grad_norm": 1.7073417901992798, "learning_rate": 2.914041085546826e-07, "loss": 0.7431, "step": 25312 }, { "epoch": 0.8941008172582333, "grad_norm": 1.7033791542053223, "learning_rate": 2.9121171521978964e-07, "loss": 0.7563, "step": 25313 }, { "epoch": 0.8941361390619412, "grad_norm": 1.7532751560211182, "learning_rate": 2.910193835126324e-07, "loss": 0.7615, "step": 25314 }, { "epoch": 0.8941714608656491, "grad_norm": 1.6125167608261108, "learning_rate": 2.9082711343572934e-07, "loss": 0.7504, "step": 25315 }, { "epoch": 0.894206782669357, "grad_norm": 1.7200558185577393, "learning_rate": 2.9063490499159463e-07, "loss": 0.8246, "step": 25316 }, { "epoch": 0.894242104473065, "grad_norm": 2.0047338008880615, "learning_rate": 2.904427581827457e-07, "loss": 0.7612, "step": 25317 }, { "epoch": 0.8942774262767729, "grad_norm": 1.5717941522598267, "learning_rate": 2.902506730116966e-07, "loss": 0.7554, "step": 25318 }, { "epoch": 0.8943127480804808, "grad_norm": 2.293426513671875, "learning_rate": 2.900586494809615e-07, "loss": 0.7918, "step": 25319 }, { "epoch": 0.8943480698841887, "grad_norm": 1.5488990545272827, "learning_rate": 2.898666875930528e-07, "loss": 0.7704, "step": 25320 }, { "epoch": 0.8943833916878965, "grad_norm": 1.6498098373413086, "learning_rate": 2.896747873504846e-07, "loss": 0.7567, "step": 25321 }, { "epoch": 0.8944187134916044, "grad_norm": 1.6375595331192017, "learning_rate": 2.89482948755766e-07, "loss": 0.75, "step": 25322 }, { "epoch": 0.8944540352953123, "grad_norm": 1.9704225063323975, "learning_rate": 2.892911718114094e-07, "loss": 0.7533, "step": 25323 }, { "epoch": 0.8944893570990202, "grad_norm": 1.595207929611206, "learning_rate": 2.8909945651992566e-07, "loss": 0.7736, "step": 25324 }, { "epoch": 0.8945246789027281, "grad_norm": 1.7036844491958618, "learning_rate": 2.8890780288382105e-07, "loss": 0.7672, "step": 25325 }, { "epoch": 0.894560000706436, "grad_norm": 1.9641200304031372, "learning_rate": 2.887162109056063e-07, "loss": 0.7283, "step": 25326 }, { "epoch": 0.894595322510144, "grad_norm": 1.9778857231140137, "learning_rate": 2.885246805877878e-07, "loss": 0.757, "step": 25327 }, { "epoch": 0.8946306443138519, "grad_norm": 1.7462612390518188, "learning_rate": 2.8833321193287353e-07, "loss": 0.744, "step": 25328 }, { "epoch": 0.8946659661175598, "grad_norm": 1.6983898878097534, "learning_rate": 2.881418049433682e-07, "loss": 0.7642, "step": 25329 }, { "epoch": 0.8947012879212677, "grad_norm": 1.7717907428741455, "learning_rate": 2.8795045962177635e-07, "loss": 0.752, "step": 25330 }, { "epoch": 0.8947366097249756, "grad_norm": 1.708707571029663, "learning_rate": 2.877591759706044e-07, "loss": 0.7268, "step": 25331 }, { "epoch": 0.8947719315286835, "grad_norm": 1.6545295715332031, "learning_rate": 2.875679539923537e-07, "loss": 0.7511, "step": 25332 }, { "epoch": 0.8948072533323914, "grad_norm": 1.6125373840332031, "learning_rate": 2.873767936895272e-07, "loss": 0.73, "step": 25333 }, { "epoch": 0.8948425751360993, "grad_norm": 1.6018754243850708, "learning_rate": 2.871856950646285e-07, "loss": 0.7281, "step": 25334 }, { "epoch": 0.8948778969398072, "grad_norm": 1.7828587293624878, "learning_rate": 2.8699465812015714e-07, "loss": 0.7231, "step": 25335 }, { "epoch": 0.8949132187435151, "grad_norm": 1.673274278640747, "learning_rate": 2.868036828586135e-07, "loss": 0.7595, "step": 25336 }, { "epoch": 0.8949485405472231, "grad_norm": 1.743343472480774, "learning_rate": 2.8661276928249717e-07, "loss": 0.8029, "step": 25337 }, { "epoch": 0.894983862350931, "grad_norm": 1.683768391609192, "learning_rate": 2.8642191739430724e-07, "loss": 0.7699, "step": 25338 }, { "epoch": 0.8950191841546389, "grad_norm": 1.796020269393921, "learning_rate": 2.8623112719654066e-07, "loss": 0.7617, "step": 25339 }, { "epoch": 0.8950545059583468, "grad_norm": 1.5203466415405273, "learning_rate": 2.860403986916954e-07, "loss": 0.7109, "step": 25340 }, { "epoch": 0.8950898277620547, "grad_norm": 1.7218579053878784, "learning_rate": 2.8584973188226784e-07, "loss": 0.7865, "step": 25341 }, { "epoch": 0.8951251495657626, "grad_norm": 1.6513826847076416, "learning_rate": 2.85659126770752e-07, "loss": 0.7833, "step": 25342 }, { "epoch": 0.8951604713694705, "grad_norm": 1.7884691953659058, "learning_rate": 2.854685833596432e-07, "loss": 0.7604, "step": 25343 }, { "epoch": 0.8951957931731784, "grad_norm": 1.5531326532363892, "learning_rate": 2.852781016514361e-07, "loss": 0.7284, "step": 25344 }, { "epoch": 0.8952311149768863, "grad_norm": 2.2627618312835693, "learning_rate": 2.8508768164862255e-07, "loss": 0.7621, "step": 25345 }, { "epoch": 0.8952664367805943, "grad_norm": 1.6409392356872559, "learning_rate": 2.84897323353695e-07, "loss": 0.7354, "step": 25346 }, { "epoch": 0.895301758584302, "grad_norm": 1.7136684656143188, "learning_rate": 2.8470702676914543e-07, "loss": 0.774, "step": 25347 }, { "epoch": 0.89533708038801, "grad_norm": 1.581257700920105, "learning_rate": 2.8451679189746393e-07, "loss": 0.7672, "step": 25348 }, { "epoch": 0.8953724021917179, "grad_norm": 1.775238037109375, "learning_rate": 2.8432661874113977e-07, "loss": 0.7302, "step": 25349 }, { "epoch": 0.8954077239954258, "grad_norm": 1.8151909112930298, "learning_rate": 2.8413650730266254e-07, "loss": 0.7389, "step": 25350 }, { "epoch": 0.8954430457991337, "grad_norm": 1.4786686897277832, "learning_rate": 2.8394645758452024e-07, "loss": 0.7453, "step": 25351 }, { "epoch": 0.8954783676028416, "grad_norm": 1.6705626249313354, "learning_rate": 2.8375646958919977e-07, "loss": 0.7406, "step": 25352 }, { "epoch": 0.8955136894065495, "grad_norm": 0.9058667421340942, "learning_rate": 2.8356654331918864e-07, "loss": 0.5642, "step": 25353 }, { "epoch": 0.8955490112102574, "grad_norm": 1.6585414409637451, "learning_rate": 2.8337667877697205e-07, "loss": 0.754, "step": 25354 }, { "epoch": 0.8955843330139653, "grad_norm": 1.586501955986023, "learning_rate": 2.8318687596503414e-07, "loss": 0.7203, "step": 25355 }, { "epoch": 0.8956196548176732, "grad_norm": 1.7074474096298218, "learning_rate": 2.829971348858601e-07, "loss": 0.7772, "step": 25356 }, { "epoch": 0.8956549766213812, "grad_norm": 1.647376298904419, "learning_rate": 2.8280745554193354e-07, "loss": 0.7277, "step": 25357 }, { "epoch": 0.8956902984250891, "grad_norm": 1.7813011407852173, "learning_rate": 2.8261783793573526e-07, "loss": 0.7384, "step": 25358 }, { "epoch": 0.895725620228797, "grad_norm": 1.7640942335128784, "learning_rate": 2.824282820697483e-07, "loss": 0.7935, "step": 25359 }, { "epoch": 0.8957609420325049, "grad_norm": 2.407277822494507, "learning_rate": 2.822387879464533e-07, "loss": 0.7566, "step": 25360 }, { "epoch": 0.8957962638362128, "grad_norm": 1.5855505466461182, "learning_rate": 2.8204935556833015e-07, "loss": 0.7521, "step": 25361 }, { "epoch": 0.8958315856399207, "grad_norm": 1.664468765258789, "learning_rate": 2.818599849378578e-07, "loss": 0.7258, "step": 25362 }, { "epoch": 0.8958669074436286, "grad_norm": 1.6946152448654175, "learning_rate": 2.816706760575155e-07, "loss": 0.7609, "step": 25363 }, { "epoch": 0.8959022292473365, "grad_norm": 1.657314658164978, "learning_rate": 2.814814289297801e-07, "loss": 0.7738, "step": 25364 }, { "epoch": 0.8959375510510444, "grad_norm": 2.07283616065979, "learning_rate": 2.8129224355712913e-07, "loss": 0.7611, "step": 25365 }, { "epoch": 0.8959728728547524, "grad_norm": 1.807140588760376, "learning_rate": 2.811031199420383e-07, "loss": 0.7643, "step": 25366 }, { "epoch": 0.8960081946584603, "grad_norm": 1.9397794008255005, "learning_rate": 2.8091405808698345e-07, "loss": 0.7502, "step": 25367 }, { "epoch": 0.8960435164621682, "grad_norm": 1.7556016445159912, "learning_rate": 2.807250579944371e-07, "loss": 0.7538, "step": 25368 }, { "epoch": 0.8960788382658761, "grad_norm": 1.8013681173324585, "learning_rate": 2.8053611966687433e-07, "loss": 0.7574, "step": 25369 }, { "epoch": 0.896114160069584, "grad_norm": 1.556564450263977, "learning_rate": 2.8034724310676776e-07, "loss": 0.7493, "step": 25370 }, { "epoch": 0.8961494818732919, "grad_norm": 1.8443551063537598, "learning_rate": 2.8015842831658926e-07, "loss": 0.7421, "step": 25371 }, { "epoch": 0.8961848036769998, "grad_norm": 1.6412091255187988, "learning_rate": 2.799696752988096e-07, "loss": 0.7539, "step": 25372 }, { "epoch": 0.8962201254807076, "grad_norm": 1.6826194524765015, "learning_rate": 2.797809840559007e-07, "loss": 0.7776, "step": 25373 }, { "epoch": 0.8962554472844155, "grad_norm": 1.554378867149353, "learning_rate": 2.795923545903301e-07, "loss": 0.7094, "step": 25374 }, { "epoch": 0.8962907690881234, "grad_norm": 1.6469422578811646, "learning_rate": 2.794037869045674e-07, "loss": 0.754, "step": 25375 }, { "epoch": 0.8963260908918314, "grad_norm": 1.7743325233459473, "learning_rate": 2.792152810010812e-07, "loss": 0.7811, "step": 25376 }, { "epoch": 0.8963614126955393, "grad_norm": 1.6854398250579834, "learning_rate": 2.790268368823373e-07, "loss": 0.7695, "step": 25377 }, { "epoch": 0.8963967344992472, "grad_norm": 1.801877737045288, "learning_rate": 2.7883845455080274e-07, "loss": 0.7972, "step": 25378 }, { "epoch": 0.8964320563029551, "grad_norm": 1.567350149154663, "learning_rate": 2.786501340089437e-07, "loss": 0.7475, "step": 25379 }, { "epoch": 0.896467378106663, "grad_norm": 1.7434074878692627, "learning_rate": 2.784618752592233e-07, "loss": 0.7475, "step": 25380 }, { "epoch": 0.8965026999103709, "grad_norm": 1.9124003648757935, "learning_rate": 2.7827367830410676e-07, "loss": 0.7865, "step": 25381 }, { "epoch": 0.8965380217140788, "grad_norm": 1.8027563095092773, "learning_rate": 2.780855431460572e-07, "loss": 0.7633, "step": 25382 }, { "epoch": 0.8965733435177867, "grad_norm": 2.045441150665283, "learning_rate": 2.778974697875353e-07, "loss": 0.7788, "step": 25383 }, { "epoch": 0.8966086653214946, "grad_norm": 1.6535300016403198, "learning_rate": 2.777094582310047e-07, "loss": 0.7739, "step": 25384 }, { "epoch": 0.8966439871252025, "grad_norm": 1.8430790901184082, "learning_rate": 2.775215084789246e-07, "loss": 0.7614, "step": 25385 }, { "epoch": 0.8966793089289105, "grad_norm": 1.7258963584899902, "learning_rate": 2.7733362053375565e-07, "loss": 0.7833, "step": 25386 }, { "epoch": 0.8967146307326184, "grad_norm": 1.789363980293274, "learning_rate": 2.771457943979561e-07, "loss": 0.7492, "step": 25387 }, { "epoch": 0.8967499525363263, "grad_norm": 1.810792326927185, "learning_rate": 2.7695803007398437e-07, "loss": 0.763, "step": 25388 }, { "epoch": 0.8967852743400342, "grad_norm": 1.7591205835342407, "learning_rate": 2.7677032756429857e-07, "loss": 0.7387, "step": 25389 }, { "epoch": 0.8968205961437421, "grad_norm": 1.875793218612671, "learning_rate": 2.765826868713545e-07, "loss": 0.756, "step": 25390 }, { "epoch": 0.89685591794745, "grad_norm": 1.7134478092193604, "learning_rate": 2.763951079976085e-07, "loss": 0.757, "step": 25391 }, { "epoch": 0.8968912397511579, "grad_norm": 1.565818428993225, "learning_rate": 2.762075909455159e-07, "loss": 0.7558, "step": 25392 }, { "epoch": 0.8969265615548658, "grad_norm": 1.6861069202423096, "learning_rate": 2.760201357175296e-07, "loss": 0.7686, "step": 25393 }, { "epoch": 0.8969618833585737, "grad_norm": 1.7122124433517456, "learning_rate": 2.758327423161034e-07, "loss": 0.7824, "step": 25394 }, { "epoch": 0.8969972051622817, "grad_norm": 1.8228942155838013, "learning_rate": 2.756454107436907e-07, "loss": 0.8073, "step": 25395 }, { "epoch": 0.8970325269659896, "grad_norm": 1.9870203733444214, "learning_rate": 2.7545814100274347e-07, "loss": 0.7432, "step": 25396 }, { "epoch": 0.8970678487696975, "grad_norm": 1.8486889600753784, "learning_rate": 2.7527093309571096e-07, "loss": 0.7598, "step": 25397 }, { "epoch": 0.8971031705734054, "grad_norm": 1.6702141761779785, "learning_rate": 2.7508378702504446e-07, "loss": 0.7892, "step": 25398 }, { "epoch": 0.8971384923771132, "grad_norm": 1.9765208959579468, "learning_rate": 2.7489670279319427e-07, "loss": 0.7562, "step": 25399 }, { "epoch": 0.8971738141808211, "grad_norm": 1.8332403898239136, "learning_rate": 2.747096804026067e-07, "loss": 0.797, "step": 25400 }, { "epoch": 0.897209135984529, "grad_norm": 1.5921186208724976, "learning_rate": 2.745227198557304e-07, "loss": 0.7411, "step": 25401 }, { "epoch": 0.8972444577882369, "grad_norm": 1.8261476755142212, "learning_rate": 2.743358211550134e-07, "loss": 0.771, "step": 25402 }, { "epoch": 0.8972797795919448, "grad_norm": 1.7081153392791748, "learning_rate": 2.7414898430290107e-07, "loss": 0.7206, "step": 25403 }, { "epoch": 0.8973151013956527, "grad_norm": 1.7897790670394897, "learning_rate": 2.7396220930183735e-07, "loss": 0.8215, "step": 25404 }, { "epoch": 0.8973504231993606, "grad_norm": 1.758907437324524, "learning_rate": 2.7377549615426823e-07, "loss": 0.7762, "step": 25405 }, { "epoch": 0.8973857450030686, "grad_norm": 1.9206701517105103, "learning_rate": 2.7358884486263725e-07, "loss": 0.7618, "step": 25406 }, { "epoch": 0.8974210668067765, "grad_norm": 1.6263363361358643, "learning_rate": 2.734022554293869e-07, "loss": 0.748, "step": 25407 }, { "epoch": 0.8974563886104844, "grad_norm": 2.564589023590088, "learning_rate": 2.732157278569586e-07, "loss": 0.7842, "step": 25408 }, { "epoch": 0.8974917104141923, "grad_norm": 1.8569839000701904, "learning_rate": 2.7302926214779536e-07, "loss": 0.783, "step": 25409 }, { "epoch": 0.8975270322179002, "grad_norm": 1.537567377090454, "learning_rate": 2.7284285830433634e-07, "loss": 0.7574, "step": 25410 }, { "epoch": 0.8975623540216081, "grad_norm": 3.6595733165740967, "learning_rate": 2.726565163290207e-07, "loss": 0.7623, "step": 25411 }, { "epoch": 0.897597675825316, "grad_norm": 1.7138339281082153, "learning_rate": 2.7247023622428867e-07, "loss": 0.8045, "step": 25412 }, { "epoch": 0.8976329976290239, "grad_norm": 1.656697392463684, "learning_rate": 2.722840179925773e-07, "loss": 0.7714, "step": 25413 }, { "epoch": 0.8976683194327318, "grad_norm": 1.645113229751587, "learning_rate": 2.720978616363235e-07, "loss": 0.7711, "step": 25414 }, { "epoch": 0.8977036412364398, "grad_norm": 1.5730136632919312, "learning_rate": 2.719117671579652e-07, "loss": 0.719, "step": 25415 }, { "epoch": 0.8977389630401477, "grad_norm": 1.5821914672851562, "learning_rate": 2.717257345599361e-07, "loss": 0.7736, "step": 25416 }, { "epoch": 0.8977742848438556, "grad_norm": 1.727128267288208, "learning_rate": 2.715397638446715e-07, "loss": 0.7521, "step": 25417 }, { "epoch": 0.8978096066475635, "grad_norm": 1.6177430152893066, "learning_rate": 2.7135385501460667e-07, "loss": 0.722, "step": 25418 }, { "epoch": 0.8978449284512714, "grad_norm": 1.6115044355392456, "learning_rate": 2.711680080721729e-07, "loss": 0.7415, "step": 25419 }, { "epoch": 0.8978802502549793, "grad_norm": 0.9755045771598816, "learning_rate": 2.709822230198028e-07, "loss": 0.5855, "step": 25420 }, { "epoch": 0.8979155720586872, "grad_norm": 1.6526942253112793, "learning_rate": 2.7079649985992995e-07, "loss": 0.7693, "step": 25421 }, { "epoch": 0.8979508938623951, "grad_norm": 1.7515246868133545, "learning_rate": 2.706108385949829e-07, "loss": 0.8057, "step": 25422 }, { "epoch": 0.897986215666103, "grad_norm": 1.7099051475524902, "learning_rate": 2.7042523922739204e-07, "loss": 0.7819, "step": 25423 }, { "epoch": 0.898021537469811, "grad_norm": 1.970475435256958, "learning_rate": 2.702397017595859e-07, "loss": 0.7629, "step": 25424 }, { "epoch": 0.8980568592735187, "grad_norm": 1.606392502784729, "learning_rate": 2.700542261939948e-07, "loss": 0.7934, "step": 25425 }, { "epoch": 0.8980921810772267, "grad_norm": 1.7394521236419678, "learning_rate": 2.6986881253304344e-07, "loss": 0.7925, "step": 25426 }, { "epoch": 0.8981275028809346, "grad_norm": 1.6344876289367676, "learning_rate": 2.6968346077916043e-07, "loss": 0.7613, "step": 25427 }, { "epoch": 0.8981628246846425, "grad_norm": 1.7817444801330566, "learning_rate": 2.694981709347716e-07, "loss": 0.7666, "step": 25428 }, { "epoch": 0.8981981464883504, "grad_norm": 1.660543441772461, "learning_rate": 2.693129430023011e-07, "loss": 0.7453, "step": 25429 }, { "epoch": 0.8982334682920583, "grad_norm": 1.722611665725708, "learning_rate": 2.6912777698417325e-07, "loss": 0.7504, "step": 25430 }, { "epoch": 0.8982687900957662, "grad_norm": 1.6595977544784546, "learning_rate": 2.689426728828126e-07, "loss": 0.7401, "step": 25431 }, { "epoch": 0.8983041118994741, "grad_norm": 1.728074550628662, "learning_rate": 2.687576307006395e-07, "loss": 0.7635, "step": 25432 }, { "epoch": 0.898339433703182, "grad_norm": 1.6580907106399536, "learning_rate": 2.685726504400782e-07, "loss": 0.7344, "step": 25433 }, { "epoch": 0.8983747555068899, "grad_norm": 1.902811050415039, "learning_rate": 2.6838773210354884e-07, "loss": 0.7846, "step": 25434 }, { "epoch": 0.8984100773105979, "grad_norm": 1.832090139389038, "learning_rate": 2.682028756934707e-07, "loss": 0.7986, "step": 25435 }, { "epoch": 0.8984453991143058, "grad_norm": 1.607088565826416, "learning_rate": 2.6801808121226345e-07, "loss": 0.7134, "step": 25436 }, { "epoch": 0.8984807209180137, "grad_norm": 1.9485695362091064, "learning_rate": 2.678333486623474e-07, "loss": 0.7862, "step": 25437 }, { "epoch": 0.8985160427217216, "grad_norm": 1.7407721281051636, "learning_rate": 2.676486780461379e-07, "loss": 0.7689, "step": 25438 }, { "epoch": 0.8985513645254295, "grad_norm": 1.7086741924285889, "learning_rate": 2.6746406936605294e-07, "loss": 0.7629, "step": 25439 }, { "epoch": 0.8985866863291374, "grad_norm": 1.6652718782424927, "learning_rate": 2.672795226245095e-07, "loss": 0.7287, "step": 25440 }, { "epoch": 0.8986220081328453, "grad_norm": 1.7203384637832642, "learning_rate": 2.6709503782392176e-07, "loss": 0.7399, "step": 25441 }, { "epoch": 0.8986573299365532, "grad_norm": 1.9566289186477661, "learning_rate": 2.669106149667039e-07, "loss": 0.7714, "step": 25442 }, { "epoch": 0.8986926517402611, "grad_norm": 1.7692824602127075, "learning_rate": 2.667262540552701e-07, "loss": 0.7569, "step": 25443 }, { "epoch": 0.898727973543969, "grad_norm": 1.6358381509780884, "learning_rate": 2.665419550920345e-07, "loss": 0.6994, "step": 25444 }, { "epoch": 0.898763295347677, "grad_norm": 1.4958044290542603, "learning_rate": 2.663577180794069e-07, "loss": 0.753, "step": 25445 }, { "epoch": 0.8987986171513849, "grad_norm": 1.6160765886306763, "learning_rate": 2.661735430197998e-07, "loss": 0.7773, "step": 25446 }, { "epoch": 0.8988339389550928, "grad_norm": 1.967689871788025, "learning_rate": 2.6598942991562404e-07, "loss": 0.7631, "step": 25447 }, { "epoch": 0.8988692607588007, "grad_norm": 16.300308227539062, "learning_rate": 2.658053787692877e-07, "loss": 0.7614, "step": 25448 }, { "epoch": 0.8989045825625086, "grad_norm": 3.9024550914764404, "learning_rate": 2.6562138958320106e-07, "loss": 0.7537, "step": 25449 }, { "epoch": 0.8989399043662165, "grad_norm": 1.9256372451782227, "learning_rate": 2.654374623597722e-07, "loss": 0.7824, "step": 25450 }, { "epoch": 0.8989752261699243, "grad_norm": 1.6397583484649658, "learning_rate": 2.652535971014075e-07, "loss": 0.7574, "step": 25451 }, { "epoch": 0.8990105479736322, "grad_norm": 1.8461127281188965, "learning_rate": 2.6506979381051346e-07, "loss": 0.7534, "step": 25452 }, { "epoch": 0.8990458697773401, "grad_norm": 1.7596663236618042, "learning_rate": 2.6488605248949585e-07, "loss": 0.7898, "step": 25453 }, { "epoch": 0.899081191581048, "grad_norm": 1.6537126302719116, "learning_rate": 2.6470237314076053e-07, "loss": 0.7665, "step": 25454 }, { "epoch": 0.899116513384756, "grad_norm": 1.7999370098114014, "learning_rate": 2.6451875576670894e-07, "loss": 0.7797, "step": 25455 }, { "epoch": 0.8991518351884639, "grad_norm": 1.6725515127182007, "learning_rate": 2.6433520036974635e-07, "loss": 0.7544, "step": 25456 }, { "epoch": 0.8991871569921718, "grad_norm": 1.755331039428711, "learning_rate": 2.6415170695227477e-07, "loss": 0.7434, "step": 25457 }, { "epoch": 0.8992224787958797, "grad_norm": 1.6907292604446411, "learning_rate": 2.639682755166956e-07, "loss": 0.7439, "step": 25458 }, { "epoch": 0.8992578005995876, "grad_norm": 1.8219956159591675, "learning_rate": 2.6378490606540905e-07, "loss": 0.7488, "step": 25459 }, { "epoch": 0.8992931224032955, "grad_norm": 1.6567845344543457, "learning_rate": 2.636015986008156e-07, "loss": 0.7832, "step": 25460 }, { "epoch": 0.8993284442070034, "grad_norm": 2.6059329509735107, "learning_rate": 2.6341835312531314e-07, "loss": 0.7802, "step": 25461 }, { "epoch": 0.8993637660107113, "grad_norm": 1.6491276025772095, "learning_rate": 2.632351696413016e-07, "loss": 0.7549, "step": 25462 }, { "epoch": 0.8993990878144192, "grad_norm": 0.8326572179794312, "learning_rate": 2.6305204815117726e-07, "loss": 0.575, "step": 25463 }, { "epoch": 0.8994344096181272, "grad_norm": 1.813415288925171, "learning_rate": 2.628689886573377e-07, "loss": 0.7462, "step": 25464 }, { "epoch": 0.8994697314218351, "grad_norm": 1.580117106437683, "learning_rate": 2.6268599116217816e-07, "loss": 0.7847, "step": 25465 }, { "epoch": 0.899505053225543, "grad_norm": 1.6111397743225098, "learning_rate": 2.625030556680935e-07, "loss": 0.7394, "step": 25466 }, { "epoch": 0.8995403750292509, "grad_norm": 1.98859703540802, "learning_rate": 2.6232018217747956e-07, "loss": 0.7596, "step": 25467 }, { "epoch": 0.8995756968329588, "grad_norm": 1.8815996646881104, "learning_rate": 2.6213737069272713e-07, "loss": 0.7897, "step": 25468 }, { "epoch": 0.8996110186366667, "grad_norm": 1.9750210046768188, "learning_rate": 2.619546212162305e-07, "loss": 0.7453, "step": 25469 }, { "epoch": 0.8996463404403746, "grad_norm": 1.7428056001663208, "learning_rate": 2.617719337503816e-07, "loss": 0.7928, "step": 25470 }, { "epoch": 0.8996816622440825, "grad_norm": 2.1406466960906982, "learning_rate": 2.615893082975707e-07, "loss": 0.7385, "step": 25471 }, { "epoch": 0.8997169840477904, "grad_norm": 1.6605693101882935, "learning_rate": 2.614067448601876e-07, "loss": 0.7381, "step": 25472 }, { "epoch": 0.8997523058514983, "grad_norm": 1.8207532167434692, "learning_rate": 2.6122424344062367e-07, "loss": 0.753, "step": 25473 }, { "epoch": 0.8997876276552063, "grad_norm": 1.6388096809387207, "learning_rate": 2.610418040412649e-07, "loss": 0.752, "step": 25474 }, { "epoch": 0.8998229494589142, "grad_norm": 1.7187459468841553, "learning_rate": 2.6085942666450037e-07, "loss": 0.788, "step": 25475 }, { "epoch": 0.8998582712626221, "grad_norm": 1.7645204067230225, "learning_rate": 2.6067711131271713e-07, "loss": 0.8169, "step": 25476 }, { "epoch": 0.8998935930663299, "grad_norm": 2.016062021255493, "learning_rate": 2.60494857988301e-07, "loss": 0.7711, "step": 25477 }, { "epoch": 0.8999289148700378, "grad_norm": 1.559095859527588, "learning_rate": 2.603126666936367e-07, "loss": 0.7484, "step": 25478 }, { "epoch": 0.8999642366737457, "grad_norm": 1.6964478492736816, "learning_rate": 2.6013053743110973e-07, "loss": 0.7398, "step": 25479 }, { "epoch": 0.8999995584774536, "grad_norm": 1.7809133529663086, "learning_rate": 2.599484702031035e-07, "loss": 0.743, "step": 25480 }, { "epoch": 0.9000348802811615, "grad_norm": 1.956624984741211, "learning_rate": 2.5976646501199964e-07, "loss": 0.7708, "step": 25481 }, { "epoch": 0.9000702020848694, "grad_norm": 1.7279020547866821, "learning_rate": 2.595845218601817e-07, "loss": 0.7787, "step": 25482 }, { "epoch": 0.9001055238885773, "grad_norm": 1.6113413572311401, "learning_rate": 2.5940264075003105e-07, "loss": 0.7914, "step": 25483 }, { "epoch": 0.9001408456922853, "grad_norm": 1.8188925981521606, "learning_rate": 2.5922082168392704e-07, "loss": 0.746, "step": 25484 }, { "epoch": 0.9001761674959932, "grad_norm": 1.6014550924301147, "learning_rate": 2.5903906466424934e-07, "loss": 0.7572, "step": 25485 }, { "epoch": 0.9002114892997011, "grad_norm": 1.587570309638977, "learning_rate": 2.588573696933777e-07, "loss": 0.7474, "step": 25486 }, { "epoch": 0.900246811103409, "grad_norm": 1.8396289348602295, "learning_rate": 2.586757367736892e-07, "loss": 0.758, "step": 25487 }, { "epoch": 0.9002821329071169, "grad_norm": 1.7058430910110474, "learning_rate": 2.584941659075618e-07, "loss": 0.7898, "step": 25488 }, { "epoch": 0.9003174547108248, "grad_norm": 1.5163116455078125, "learning_rate": 2.583126570973715e-07, "loss": 0.7257, "step": 25489 }, { "epoch": 0.9003527765145327, "grad_norm": 1.6597342491149902, "learning_rate": 2.5813121034549303e-07, "loss": 0.7789, "step": 25490 }, { "epoch": 0.9003880983182406, "grad_norm": 1.794512391090393, "learning_rate": 2.5794982565430225e-07, "loss": 0.7511, "step": 25491 }, { "epoch": 0.9004234201219485, "grad_norm": 1.887107014656067, "learning_rate": 2.577685030261734e-07, "loss": 0.7591, "step": 25492 }, { "epoch": 0.9004587419256564, "grad_norm": 1.7149778604507446, "learning_rate": 2.5758724246347845e-07, "loss": 0.8002, "step": 25493 }, { "epoch": 0.9004940637293644, "grad_norm": 2.1881682872772217, "learning_rate": 2.574060439685899e-07, "loss": 0.7882, "step": 25494 }, { "epoch": 0.9005293855330723, "grad_norm": 1.6358520984649658, "learning_rate": 2.572249075438804e-07, "loss": 0.7742, "step": 25495 }, { "epoch": 0.9005647073367802, "grad_norm": 1.6783661842346191, "learning_rate": 2.5704383319171967e-07, "loss": 0.7333, "step": 25496 }, { "epoch": 0.9006000291404881, "grad_norm": 1.4849638938903809, "learning_rate": 2.568628209144769e-07, "loss": 0.7469, "step": 25497 }, { "epoch": 0.900635350944196, "grad_norm": 1.6130000352859497, "learning_rate": 2.566818707145219e-07, "loss": 0.7766, "step": 25498 }, { "epoch": 0.9006706727479039, "grad_norm": 1.8520427942276, "learning_rate": 2.5650098259422386e-07, "loss": 0.7802, "step": 25499 }, { "epoch": 0.9007059945516118, "grad_norm": 1.614797830581665, "learning_rate": 2.563201565559481e-07, "loss": 0.7338, "step": 25500 }, { "epoch": 0.9007413163553197, "grad_norm": 1.7719721794128418, "learning_rate": 2.561393926020628e-07, "loss": 0.7696, "step": 25501 }, { "epoch": 0.9007766381590276, "grad_norm": 1.8156378269195557, "learning_rate": 2.559586907349343e-07, "loss": 0.7666, "step": 25502 }, { "epoch": 0.9008119599627354, "grad_norm": 2.083491563796997, "learning_rate": 2.557780509569258e-07, "loss": 0.7632, "step": 25503 }, { "epoch": 0.9008472817664434, "grad_norm": 1.8097715377807617, "learning_rate": 2.5559747327040196e-07, "loss": 0.7399, "step": 25504 }, { "epoch": 0.9008826035701513, "grad_norm": 1.627797245979309, "learning_rate": 2.5541695767772765e-07, "loss": 0.7572, "step": 25505 }, { "epoch": 0.9009179253738592, "grad_norm": 1.6695613861083984, "learning_rate": 2.5523650418126375e-07, "loss": 0.7549, "step": 25506 }, { "epoch": 0.9009532471775671, "grad_norm": 1.8480371236801147, "learning_rate": 2.5505611278337227e-07, "loss": 0.7681, "step": 25507 }, { "epoch": 0.900988568981275, "grad_norm": 1.7040276527404785, "learning_rate": 2.5487578348641515e-07, "loss": 0.775, "step": 25508 }, { "epoch": 0.9010238907849829, "grad_norm": 2.266646146774292, "learning_rate": 2.546955162927511e-07, "loss": 0.7714, "step": 25509 }, { "epoch": 0.9010592125886908, "grad_norm": 1.7943217754364014, "learning_rate": 2.5451531120473995e-07, "loss": 0.7754, "step": 25510 }, { "epoch": 0.9010945343923987, "grad_norm": 2.328674077987671, "learning_rate": 2.543351682247408e-07, "loss": 0.7339, "step": 25511 }, { "epoch": 0.9011298561961066, "grad_norm": 1.6896418333053589, "learning_rate": 2.5415508735511127e-07, "loss": 0.7499, "step": 25512 }, { "epoch": 0.9011651779998145, "grad_norm": 1.9721585512161255, "learning_rate": 2.539750685982073e-07, "loss": 0.7728, "step": 25513 }, { "epoch": 0.9012004998035225, "grad_norm": 1.8773771524429321, "learning_rate": 2.5379511195638586e-07, "loss": 0.8008, "step": 25514 }, { "epoch": 0.9012358216072304, "grad_norm": 1.725921392440796, "learning_rate": 2.5361521743200224e-07, "loss": 0.7656, "step": 25515 }, { "epoch": 0.9012711434109383, "grad_norm": 1.6103464365005493, "learning_rate": 2.5343538502740905e-07, "loss": 0.7395, "step": 25516 }, { "epoch": 0.9013064652146462, "grad_norm": 1.7007414102554321, "learning_rate": 2.532556147449622e-07, "loss": 0.7655, "step": 25517 }, { "epoch": 0.9013417870183541, "grad_norm": 1.6789803504943848, "learning_rate": 2.5307590658701364e-07, "loss": 0.7677, "step": 25518 }, { "epoch": 0.901377108822062, "grad_norm": 1.8362971544265747, "learning_rate": 2.5289626055591485e-07, "loss": 0.7624, "step": 25519 }, { "epoch": 0.9014124306257699, "grad_norm": 1.5393714904785156, "learning_rate": 2.5271667665401734e-07, "loss": 0.7329, "step": 25520 }, { "epoch": 0.9014477524294778, "grad_norm": 1.6698863506317139, "learning_rate": 2.5253715488367137e-07, "loss": 0.7727, "step": 25521 }, { "epoch": 0.9014830742331857, "grad_norm": 1.7850006818771362, "learning_rate": 2.5235769524722787e-07, "loss": 0.757, "step": 25522 }, { "epoch": 0.9015183960368937, "grad_norm": 1.7512297630310059, "learning_rate": 2.521782977470333e-07, "loss": 0.7466, "step": 25523 }, { "epoch": 0.9015537178406016, "grad_norm": 1.6680822372436523, "learning_rate": 2.5199896238543686e-07, "loss": 0.7883, "step": 25524 }, { "epoch": 0.9015890396443095, "grad_norm": 1.9935542345046997, "learning_rate": 2.5181968916478615e-07, "loss": 0.7743, "step": 25525 }, { "epoch": 0.9016243614480174, "grad_norm": 1.6515209674835205, "learning_rate": 2.5164047808742596e-07, "loss": 0.7803, "step": 25526 }, { "epoch": 0.9016596832517253, "grad_norm": 1.6765644550323486, "learning_rate": 2.5146132915570277e-07, "loss": 0.7813, "step": 25527 }, { "epoch": 0.9016950050554332, "grad_norm": 1.661728024482727, "learning_rate": 2.5128224237196186e-07, "loss": 0.7748, "step": 25528 }, { "epoch": 0.901730326859141, "grad_norm": 1.611941933631897, "learning_rate": 2.5110321773854527e-07, "loss": 0.7504, "step": 25529 }, { "epoch": 0.9017656486628489, "grad_norm": 1.966794490814209, "learning_rate": 2.509242552577973e-07, "loss": 0.768, "step": 25530 }, { "epoch": 0.9018009704665568, "grad_norm": 1.6011847257614136, "learning_rate": 2.507453549320604e-07, "loss": 0.741, "step": 25531 }, { "epoch": 0.9018362922702647, "grad_norm": 1.743091344833374, "learning_rate": 2.505665167636756e-07, "loss": 0.7694, "step": 25532 }, { "epoch": 0.9018716140739726, "grad_norm": 1.713775396347046, "learning_rate": 2.503877407549832e-07, "loss": 0.7426, "step": 25533 }, { "epoch": 0.9019069358776806, "grad_norm": 1.63323974609375, "learning_rate": 2.5020902690832296e-07, "loss": 0.7455, "step": 25534 }, { "epoch": 0.9019422576813885, "grad_norm": 2.1445906162261963, "learning_rate": 2.5003037522603467e-07, "loss": 0.7078, "step": 25535 }, { "epoch": 0.9019775794850964, "grad_norm": 1.6280027627944946, "learning_rate": 2.4985178571045544e-07, "loss": 0.7645, "step": 25536 }, { "epoch": 0.9020129012888043, "grad_norm": 1.671552300453186, "learning_rate": 2.496732583639228e-07, "loss": 0.7406, "step": 25537 }, { "epoch": 0.9020482230925122, "grad_norm": 1.7494479417800903, "learning_rate": 2.494947931887742e-07, "loss": 0.7826, "step": 25538 }, { "epoch": 0.9020835448962201, "grad_norm": 1.5656696557998657, "learning_rate": 2.493163901873447e-07, "loss": 0.7599, "step": 25539 }, { "epoch": 0.902118866699928, "grad_norm": 1.7358551025390625, "learning_rate": 2.491380493619688e-07, "loss": 0.7962, "step": 25540 }, { "epoch": 0.9021541885036359, "grad_norm": 1.740509271621704, "learning_rate": 2.4895977071498156e-07, "loss": 0.75, "step": 25541 }, { "epoch": 0.9021895103073438, "grad_norm": 1.945914626121521, "learning_rate": 2.487815542487154e-07, "loss": 0.7884, "step": 25542 }, { "epoch": 0.9022248321110518, "grad_norm": 1.6860135793685913, "learning_rate": 2.48603399965503e-07, "loss": 0.7721, "step": 25543 }, { "epoch": 0.9022601539147597, "grad_norm": 1.6814920902252197, "learning_rate": 2.484253078676768e-07, "loss": 0.7333, "step": 25544 }, { "epoch": 0.9022954757184676, "grad_norm": 1.6818705797195435, "learning_rate": 2.482472779575662e-07, "loss": 0.7371, "step": 25545 }, { "epoch": 0.9023307975221755, "grad_norm": 1.6625686883926392, "learning_rate": 2.480693102375026e-07, "loss": 0.7719, "step": 25546 }, { "epoch": 0.9023661193258834, "grad_norm": 1.5430433750152588, "learning_rate": 2.4789140470981466e-07, "loss": 0.7098, "step": 25547 }, { "epoch": 0.9024014411295913, "grad_norm": 1.7303777933120728, "learning_rate": 2.4771356137683e-07, "loss": 0.7119, "step": 25548 }, { "epoch": 0.9024367629332992, "grad_norm": 1.6198819875717163, "learning_rate": 2.475357802408773e-07, "loss": 0.7428, "step": 25549 }, { "epoch": 0.9024720847370071, "grad_norm": 1.7606070041656494, "learning_rate": 2.473580613042831e-07, "loss": 0.7643, "step": 25550 }, { "epoch": 0.902507406540715, "grad_norm": 1.8171266317367554, "learning_rate": 2.4718040456937375e-07, "loss": 0.7451, "step": 25551 }, { "epoch": 0.902542728344423, "grad_norm": 1.7761530876159668, "learning_rate": 2.4700281003847303e-07, "loss": 0.7536, "step": 25552 }, { "epoch": 0.9025780501481309, "grad_norm": 1.4970998764038086, "learning_rate": 2.4682527771390575e-07, "loss": 0.7713, "step": 25553 }, { "epoch": 0.9026133719518388, "grad_norm": 1.8067963123321533, "learning_rate": 2.4664780759799667e-07, "loss": 0.777, "step": 25554 }, { "epoch": 0.9026486937555466, "grad_norm": 1.6612259149551392, "learning_rate": 2.4647039969306674e-07, "loss": 0.751, "step": 25555 }, { "epoch": 0.9026840155592545, "grad_norm": 1.7440464496612549, "learning_rate": 2.462930540014391e-07, "loss": 0.7623, "step": 25556 }, { "epoch": 0.9027193373629624, "grad_norm": 1.7271509170532227, "learning_rate": 2.4611577052543465e-07, "loss": 0.7714, "step": 25557 }, { "epoch": 0.9027546591666703, "grad_norm": 1.6010899543762207, "learning_rate": 2.459385492673727e-07, "loss": 0.7646, "step": 25558 }, { "epoch": 0.9027899809703782, "grad_norm": 1.5773893594741821, "learning_rate": 2.45761390229573e-07, "loss": 0.7508, "step": 25559 }, { "epoch": 0.9028253027740861, "grad_norm": 1.8912549018859863, "learning_rate": 2.45584293414356e-07, "loss": 0.7417, "step": 25560 }, { "epoch": 0.902860624577794, "grad_norm": 1.8144540786743164, "learning_rate": 2.4540725882403693e-07, "loss": 0.7385, "step": 25561 }, { "epoch": 0.902895946381502, "grad_norm": 1.684366226196289, "learning_rate": 2.452302864609335e-07, "loss": 0.7531, "step": 25562 }, { "epoch": 0.9029312681852099, "grad_norm": 1.6151429414749146, "learning_rate": 2.450533763273638e-07, "loss": 0.7485, "step": 25563 }, { "epoch": 0.9029665899889178, "grad_norm": 1.6651118993759155, "learning_rate": 2.4487652842564047e-07, "loss": 0.7168, "step": 25564 }, { "epoch": 0.9030019117926257, "grad_norm": 1.6956596374511719, "learning_rate": 2.446997427580794e-07, "loss": 0.7591, "step": 25565 }, { "epoch": 0.9030372335963336, "grad_norm": 1.7535468339920044, "learning_rate": 2.4452301932699486e-07, "loss": 0.7779, "step": 25566 }, { "epoch": 0.9030725554000415, "grad_norm": 1.8296946287155151, "learning_rate": 2.443463581346983e-07, "loss": 0.7448, "step": 25567 }, { "epoch": 0.9031078772037494, "grad_norm": 1.5430949926376343, "learning_rate": 2.441697591835024e-07, "loss": 0.7677, "step": 25568 }, { "epoch": 0.9031431990074573, "grad_norm": 1.6266945600509644, "learning_rate": 2.439932224757202e-07, "loss": 0.7557, "step": 25569 }, { "epoch": 0.9031785208111652, "grad_norm": 1.6263971328735352, "learning_rate": 2.4381674801365997e-07, "loss": 0.7674, "step": 25570 }, { "epoch": 0.9032138426148731, "grad_norm": 2.0059192180633545, "learning_rate": 2.436403357996314e-07, "loss": 0.7586, "step": 25571 }, { "epoch": 0.903249164418581, "grad_norm": 1.6676467657089233, "learning_rate": 2.434639858359444e-07, "loss": 0.7471, "step": 25572 }, { "epoch": 0.903284486222289, "grad_norm": 1.8365201950073242, "learning_rate": 2.4328769812490715e-07, "loss": 0.7649, "step": 25573 }, { "epoch": 0.9033198080259969, "grad_norm": 1.6645821332931519, "learning_rate": 2.4311147266882553e-07, "loss": 0.7522, "step": 25574 }, { "epoch": 0.9033551298297048, "grad_norm": 1.65433669090271, "learning_rate": 2.429353094700071e-07, "loss": 0.7741, "step": 25575 }, { "epoch": 0.9033904516334127, "grad_norm": 1.6717034578323364, "learning_rate": 2.427592085307573e-07, "loss": 0.7803, "step": 25576 }, { "epoch": 0.9034257734371206, "grad_norm": 1.5589046478271484, "learning_rate": 2.4258316985338035e-07, "loss": 0.7307, "step": 25577 }, { "epoch": 0.9034610952408285, "grad_norm": 2.0298290252685547, "learning_rate": 2.424071934401806e-07, "loss": 0.7655, "step": 25578 }, { "epoch": 0.9034964170445364, "grad_norm": 1.684018850326538, "learning_rate": 2.4223127929346113e-07, "loss": 0.7768, "step": 25579 }, { "epoch": 0.9035317388482443, "grad_norm": 1.6272636651992798, "learning_rate": 2.420554274155251e-07, "loss": 0.7606, "step": 25580 }, { "epoch": 0.9035670606519522, "grad_norm": 1.9575717449188232, "learning_rate": 2.418796378086724e-07, "loss": 0.7707, "step": 25581 }, { "epoch": 0.90360238245566, "grad_norm": 1.580280065536499, "learning_rate": 2.417039104752045e-07, "loss": 0.7403, "step": 25582 }, { "epoch": 0.903637704259368, "grad_norm": 1.7136012315750122, "learning_rate": 2.4152824541742224e-07, "loss": 0.7529, "step": 25583 }, { "epoch": 0.9036730260630759, "grad_norm": 1.7467902898788452, "learning_rate": 2.413526426376228e-07, "loss": 0.8028, "step": 25584 }, { "epoch": 0.9037083478667838, "grad_norm": 1.7130047082901, "learning_rate": 2.41177102138106e-07, "loss": 0.7615, "step": 25585 }, { "epoch": 0.9037436696704917, "grad_norm": 1.9239609241485596, "learning_rate": 2.410016239211688e-07, "loss": 0.7403, "step": 25586 }, { "epoch": 0.9037789914741996, "grad_norm": 1.5896414518356323, "learning_rate": 2.408262079891077e-07, "loss": 0.7575, "step": 25587 }, { "epoch": 0.9038143132779075, "grad_norm": 2.262805938720703, "learning_rate": 2.4065085434421873e-07, "loss": 0.7272, "step": 25588 }, { "epoch": 0.9038496350816154, "grad_norm": 1.9950584173202515, "learning_rate": 2.4047556298879613e-07, "loss": 0.7451, "step": 25589 }, { "epoch": 0.9038849568853233, "grad_norm": 1.6489779949188232, "learning_rate": 2.403003339251353e-07, "loss": 0.7389, "step": 25590 }, { "epoch": 0.9039202786890312, "grad_norm": 1.7266358137130737, "learning_rate": 2.401251671555288e-07, "loss": 0.7504, "step": 25591 }, { "epoch": 0.9039556004927392, "grad_norm": 1.697653889656067, "learning_rate": 2.399500626822687e-07, "loss": 0.7644, "step": 25592 }, { "epoch": 0.9039909222964471, "grad_norm": 1.7448749542236328, "learning_rate": 2.397750205076482e-07, "loss": 0.7466, "step": 25593 }, { "epoch": 0.904026244100155, "grad_norm": 1.8909419775009155, "learning_rate": 2.3960004063395714e-07, "loss": 0.7632, "step": 25594 }, { "epoch": 0.9040615659038629, "grad_norm": 1.7305703163146973, "learning_rate": 2.394251230634853e-07, "loss": 0.7601, "step": 25595 }, { "epoch": 0.9040968877075708, "grad_norm": 1.6564830541610718, "learning_rate": 2.392502677985237e-07, "loss": 0.7311, "step": 25596 }, { "epoch": 0.9041322095112787, "grad_norm": 1.9702256917953491, "learning_rate": 2.390754748413587e-07, "loss": 0.7964, "step": 25597 }, { "epoch": 0.9041675313149866, "grad_norm": 1.7030444145202637, "learning_rate": 2.3890074419427924e-07, "loss": 0.7832, "step": 25598 }, { "epoch": 0.9042028531186945, "grad_norm": 1.7674779891967773, "learning_rate": 2.387260758595722e-07, "loss": 0.7367, "step": 25599 }, { "epoch": 0.9042381749224024, "grad_norm": 1.5137085914611816, "learning_rate": 2.3855146983952306e-07, "loss": 0.755, "step": 25600 }, { "epoch": 0.9042734967261103, "grad_norm": 1.8822628259658813, "learning_rate": 2.383769261364166e-07, "loss": 0.7797, "step": 25601 }, { "epoch": 0.9043088185298183, "grad_norm": 1.670079231262207, "learning_rate": 2.3820244475253885e-07, "loss": 0.7349, "step": 25602 }, { "epoch": 0.9043441403335262, "grad_norm": 1.730533242225647, "learning_rate": 2.380280256901718e-07, "loss": 0.7303, "step": 25603 }, { "epoch": 0.9043794621372341, "grad_norm": 1.6210695505142212, "learning_rate": 2.3785366895159868e-07, "loss": 0.7668, "step": 25604 }, { "epoch": 0.904414783940942, "grad_norm": 1.7491850852966309, "learning_rate": 2.3767937453910206e-07, "loss": 0.7665, "step": 25605 }, { "epoch": 0.9044501057446499, "grad_norm": 1.9909294843673706, "learning_rate": 2.375051424549629e-07, "loss": 0.752, "step": 25606 }, { "epoch": 0.9044854275483578, "grad_norm": 1.8852598667144775, "learning_rate": 2.3733097270146054e-07, "loss": 0.7641, "step": 25607 }, { "epoch": 0.9045207493520656, "grad_norm": 1.7309370040893555, "learning_rate": 2.3715686528087477e-07, "loss": 0.7683, "step": 25608 }, { "epoch": 0.9045560711557735, "grad_norm": 0.8348608613014221, "learning_rate": 2.3698282019548547e-07, "loss": 0.5568, "step": 25609 }, { "epoch": 0.9045913929594814, "grad_norm": 1.9783936738967896, "learning_rate": 2.3680883744756912e-07, "loss": 0.7473, "step": 25610 }, { "epoch": 0.9046267147631893, "grad_norm": 1.724407434463501, "learning_rate": 2.3663491703940333e-07, "loss": 0.7609, "step": 25611 }, { "epoch": 0.9046620365668973, "grad_norm": 1.7059056758880615, "learning_rate": 2.3646105897326467e-07, "loss": 0.7456, "step": 25612 }, { "epoch": 0.9046973583706052, "grad_norm": 1.7327004671096802, "learning_rate": 2.3628726325142792e-07, "loss": 0.7611, "step": 25613 }, { "epoch": 0.9047326801743131, "grad_norm": 1.5813319683074951, "learning_rate": 2.3611352987616797e-07, "loss": 0.778, "step": 25614 }, { "epoch": 0.904768001978021, "grad_norm": 1.7330294847488403, "learning_rate": 2.3593985884975912e-07, "loss": 0.7531, "step": 25615 }, { "epoch": 0.9048033237817289, "grad_norm": 1.7767750024795532, "learning_rate": 2.3576625017447285e-07, "loss": 0.7487, "step": 25616 }, { "epoch": 0.9048386455854368, "grad_norm": 1.6759730577468872, "learning_rate": 2.3559270385258294e-07, "loss": 0.7441, "step": 25617 }, { "epoch": 0.9048739673891447, "grad_norm": 1.8382139205932617, "learning_rate": 2.354192198863603e-07, "loss": 0.8122, "step": 25618 }, { "epoch": 0.9049092891928526, "grad_norm": 1.8841722011566162, "learning_rate": 2.352457982780748e-07, "loss": 0.7806, "step": 25619 }, { "epoch": 0.9049446109965605, "grad_norm": 1.754786729812622, "learning_rate": 2.3507243902999688e-07, "loss": 0.788, "step": 25620 }, { "epoch": 0.9049799328002684, "grad_norm": 1.602760910987854, "learning_rate": 2.3489914214439525e-07, "loss": 0.751, "step": 25621 }, { "epoch": 0.9050152546039764, "grad_norm": 1.710684895515442, "learning_rate": 2.3472590762353808e-07, "loss": 0.7462, "step": 25622 }, { "epoch": 0.9050505764076843, "grad_norm": 2.0987796783447266, "learning_rate": 2.3455273546969193e-07, "loss": 0.7952, "step": 25623 }, { "epoch": 0.9050858982113922, "grad_norm": 1.8062635660171509, "learning_rate": 2.343796256851244e-07, "loss": 0.7774, "step": 25624 }, { "epoch": 0.9051212200151001, "grad_norm": 1.8025352954864502, "learning_rate": 2.3420657827210037e-07, "loss": 0.7027, "step": 25625 }, { "epoch": 0.905156541818808, "grad_norm": 1.8387670516967773, "learning_rate": 2.3403359323288466e-07, "loss": 0.77, "step": 25626 }, { "epoch": 0.9051918636225159, "grad_norm": 1.8232349157333374, "learning_rate": 2.3386067056974049e-07, "loss": 0.7344, "step": 25627 }, { "epoch": 0.9052271854262238, "grad_norm": 2.0464086532592773, "learning_rate": 2.3368781028493325e-07, "loss": 0.7754, "step": 25628 }, { "epoch": 0.9052625072299317, "grad_norm": 1.66581130027771, "learning_rate": 2.335150123807234e-07, "loss": 0.7734, "step": 25629 }, { "epoch": 0.9052978290336396, "grad_norm": 1.623263955116272, "learning_rate": 2.3334227685937294e-07, "loss": 0.7411, "step": 25630 }, { "epoch": 0.9053331508373476, "grad_norm": 0.8840562105178833, "learning_rate": 2.331696037231429e-07, "loss": 0.566, "step": 25631 }, { "epoch": 0.9053684726410555, "grad_norm": 1.7262691259384155, "learning_rate": 2.329969929742931e-07, "loss": 0.7951, "step": 25632 }, { "epoch": 0.9054037944447634, "grad_norm": 1.6208319664001465, "learning_rate": 2.3282444461508236e-07, "loss": 0.7709, "step": 25633 }, { "epoch": 0.9054391162484712, "grad_norm": 1.8173489570617676, "learning_rate": 2.3265195864776934e-07, "loss": 0.7532, "step": 25634 }, { "epoch": 0.9054744380521791, "grad_norm": 1.7594304084777832, "learning_rate": 2.324795350746112e-07, "loss": 0.7671, "step": 25635 }, { "epoch": 0.905509759855887, "grad_norm": 1.6466522216796875, "learning_rate": 2.3230717389786438e-07, "loss": 0.7611, "step": 25636 }, { "epoch": 0.9055450816595949, "grad_norm": 1.5901217460632324, "learning_rate": 2.3213487511978494e-07, "loss": 0.7381, "step": 25637 }, { "epoch": 0.9055804034633028, "grad_norm": 1.6318713426589966, "learning_rate": 2.3196263874262937e-07, "loss": 0.7201, "step": 25638 }, { "epoch": 0.9056157252670107, "grad_norm": 1.6476402282714844, "learning_rate": 2.3179046476864918e-07, "loss": 0.7778, "step": 25639 }, { "epoch": 0.9056510470707186, "grad_norm": 1.7150688171386719, "learning_rate": 2.316183532000993e-07, "loss": 0.7584, "step": 25640 }, { "epoch": 0.9056863688744266, "grad_norm": 1.6160486936569214, "learning_rate": 2.314463040392323e-07, "loss": 0.7885, "step": 25641 }, { "epoch": 0.9057216906781345, "grad_norm": 1.8921924829483032, "learning_rate": 2.3127431728829976e-07, "loss": 0.7716, "step": 25642 }, { "epoch": 0.9057570124818424, "grad_norm": 1.7526729106903076, "learning_rate": 2.3110239294955262e-07, "loss": 0.781, "step": 25643 }, { "epoch": 0.9057923342855503, "grad_norm": 1.5119904279708862, "learning_rate": 2.3093053102524133e-07, "loss": 0.7529, "step": 25644 }, { "epoch": 0.9058276560892582, "grad_norm": 1.7879618406295776, "learning_rate": 2.307587315176135e-07, "loss": 0.7935, "step": 25645 }, { "epoch": 0.9058629778929661, "grad_norm": 1.7240185737609863, "learning_rate": 2.3058699442891962e-07, "loss": 0.7494, "step": 25646 }, { "epoch": 0.905898299696674, "grad_norm": 1.7032725811004639, "learning_rate": 2.3041531976140618e-07, "loss": 0.7686, "step": 25647 }, { "epoch": 0.9059336215003819, "grad_norm": 1.7345871925354004, "learning_rate": 2.3024370751732085e-07, "loss": 0.7528, "step": 25648 }, { "epoch": 0.9059689433040898, "grad_norm": 1.620248794555664, "learning_rate": 2.30072157698909e-07, "loss": 0.7604, "step": 25649 }, { "epoch": 0.9060042651077977, "grad_norm": 1.6713075637817383, "learning_rate": 2.2990067030841611e-07, "loss": 0.7174, "step": 25650 }, { "epoch": 0.9060395869115057, "grad_norm": 2.139491558074951, "learning_rate": 2.2972924534808705e-07, "loss": 0.7803, "step": 25651 }, { "epoch": 0.9060749087152136, "grad_norm": 2.0491445064544678, "learning_rate": 2.2955788282016445e-07, "loss": 0.7546, "step": 25652 }, { "epoch": 0.9061102305189215, "grad_norm": 1.7266254425048828, "learning_rate": 2.2938658272689097e-07, "loss": 0.7697, "step": 25653 }, { "epoch": 0.9061455523226294, "grad_norm": 1.764219045639038, "learning_rate": 2.2921534507051034e-07, "loss": 0.8036, "step": 25654 }, { "epoch": 0.9061808741263373, "grad_norm": 1.7777791023254395, "learning_rate": 2.2904416985326195e-07, "loss": 0.7878, "step": 25655 }, { "epoch": 0.9062161959300452, "grad_norm": 1.6244248151779175, "learning_rate": 2.2887305707738616e-07, "loss": 0.7534, "step": 25656 }, { "epoch": 0.9062515177337531, "grad_norm": 1.77886962890625, "learning_rate": 2.28702006745124e-07, "loss": 0.757, "step": 25657 }, { "epoch": 0.906286839537461, "grad_norm": 1.7559576034545898, "learning_rate": 2.2853101885871198e-07, "loss": 0.7715, "step": 25658 }, { "epoch": 0.9063221613411689, "grad_norm": 1.7874618768692017, "learning_rate": 2.2836009342038945e-07, "loss": 0.7799, "step": 25659 }, { "epoch": 0.9063574831448767, "grad_norm": 1.647371768951416, "learning_rate": 2.281892304323935e-07, "loss": 0.7441, "step": 25660 }, { "epoch": 0.9063928049485847, "grad_norm": 4.123466968536377, "learning_rate": 2.2801842989695955e-07, "loss": 0.789, "step": 25661 }, { "epoch": 0.9064281267522926, "grad_norm": 1.5978326797485352, "learning_rate": 2.2784769181632303e-07, "loss": 0.7617, "step": 25662 }, { "epoch": 0.9064634485560005, "grad_norm": 1.6453266143798828, "learning_rate": 2.2767701619271888e-07, "loss": 0.7357, "step": 25663 }, { "epoch": 0.9064987703597084, "grad_norm": 1.713841438293457, "learning_rate": 2.2750640302838188e-07, "loss": 0.785, "step": 25664 }, { "epoch": 0.9065340921634163, "grad_norm": 2.051224946975708, "learning_rate": 2.2733585232554257e-07, "loss": 0.7538, "step": 25665 }, { "epoch": 0.9065694139671242, "grad_norm": 1.600095510482788, "learning_rate": 2.271653640864352e-07, "loss": 0.7613, "step": 25666 }, { "epoch": 0.9066047357708321, "grad_norm": 1.7119035720825195, "learning_rate": 2.269949383132902e-07, "loss": 0.771, "step": 25667 }, { "epoch": 0.90664005757454, "grad_norm": 1.579006552696228, "learning_rate": 2.268245750083381e-07, "loss": 0.7489, "step": 25668 }, { "epoch": 0.9066753793782479, "grad_norm": 0.9857734441757202, "learning_rate": 2.266542741738087e-07, "loss": 0.5927, "step": 25669 }, { "epoch": 0.9067107011819558, "grad_norm": 1.8312450647354126, "learning_rate": 2.2648403581193135e-07, "loss": 0.779, "step": 25670 }, { "epoch": 0.9067460229856638, "grad_norm": 1.825264811515808, "learning_rate": 2.2631385992493315e-07, "loss": 0.7622, "step": 25671 }, { "epoch": 0.9067813447893717, "grad_norm": 1.7472195625305176, "learning_rate": 2.2614374651504124e-07, "loss": 0.7647, "step": 25672 }, { "epoch": 0.9068166665930796, "grad_norm": 1.8167105913162231, "learning_rate": 2.259736955844838e-07, "loss": 0.7834, "step": 25673 }, { "epoch": 0.9068519883967875, "grad_norm": 1.7916202545166016, "learning_rate": 2.258037071354846e-07, "loss": 0.7631, "step": 25674 }, { "epoch": 0.9068873102004954, "grad_norm": 1.6237813234329224, "learning_rate": 2.2563378117026857e-07, "loss": 0.7763, "step": 25675 }, { "epoch": 0.9069226320042033, "grad_norm": 1.728555679321289, "learning_rate": 2.2546391769106112e-07, "loss": 0.7327, "step": 25676 }, { "epoch": 0.9069579538079112, "grad_norm": 1.7794530391693115, "learning_rate": 2.2529411670008382e-07, "loss": 0.7533, "step": 25677 }, { "epoch": 0.9069932756116191, "grad_norm": 2.8734774589538574, "learning_rate": 2.2512437819955934e-07, "loss": 0.7614, "step": 25678 }, { "epoch": 0.907028597415327, "grad_norm": 1.6047710180282593, "learning_rate": 2.2495470219171034e-07, "loss": 0.7592, "step": 25679 }, { "epoch": 0.907063919219035, "grad_norm": 1.8865678310394287, "learning_rate": 2.2478508867875615e-07, "loss": 0.7567, "step": 25680 }, { "epoch": 0.9070992410227429, "grad_norm": 1.6879369020462036, "learning_rate": 2.2461553766291667e-07, "loss": 0.7281, "step": 25681 }, { "epoch": 0.9071345628264508, "grad_norm": 2.068572998046875, "learning_rate": 2.2444604914641178e-07, "loss": 0.7541, "step": 25682 }, { "epoch": 0.9071698846301587, "grad_norm": 1.6725618839263916, "learning_rate": 2.2427662313145914e-07, "loss": 0.7327, "step": 25683 }, { "epoch": 0.9072052064338666, "grad_norm": 1.5864276885986328, "learning_rate": 2.2410725962027645e-07, "loss": 0.7614, "step": 25684 }, { "epoch": 0.9072405282375745, "grad_norm": 1.4871900081634521, "learning_rate": 2.239379586150797e-07, "loss": 0.7485, "step": 25685 }, { "epoch": 0.9072758500412823, "grad_norm": 1.6128898859024048, "learning_rate": 2.2376872011808604e-07, "loss": 0.7184, "step": 25686 }, { "epoch": 0.9073111718449902, "grad_norm": 0.9569472670555115, "learning_rate": 2.2359954413150865e-07, "loss": 0.5759, "step": 25687 }, { "epoch": 0.9073464936486981, "grad_norm": 1.7803679704666138, "learning_rate": 2.2343043065756243e-07, "loss": 0.7349, "step": 25688 }, { "epoch": 0.907381815452406, "grad_norm": 1.8648377656936646, "learning_rate": 2.2326137969846173e-07, "loss": 0.7491, "step": 25689 }, { "epoch": 0.907417137256114, "grad_norm": 1.8215243816375732, "learning_rate": 2.2309239125641758e-07, "loss": 0.7716, "step": 25690 }, { "epoch": 0.9074524590598219, "grad_norm": 1.6862237453460693, "learning_rate": 2.2292346533364205e-07, "loss": 0.7506, "step": 25691 }, { "epoch": 0.9074877808635298, "grad_norm": 1.8053851127624512, "learning_rate": 2.2275460193234732e-07, "loss": 0.7643, "step": 25692 }, { "epoch": 0.9075231026672377, "grad_norm": 1.6905254125595093, "learning_rate": 2.2258580105474104e-07, "loss": 0.7689, "step": 25693 }, { "epoch": 0.9075584244709456, "grad_norm": 1.5664527416229248, "learning_rate": 2.224170627030342e-07, "loss": 0.7722, "step": 25694 }, { "epoch": 0.9075937462746535, "grad_norm": 1.7247467041015625, "learning_rate": 2.2224838687943452e-07, "loss": 0.7663, "step": 25695 }, { "epoch": 0.9076290680783614, "grad_norm": 1.682625651359558, "learning_rate": 2.2207977358615073e-07, "loss": 0.7902, "step": 25696 }, { "epoch": 0.9076643898820693, "grad_norm": 1.687303066253662, "learning_rate": 2.219112228253878e-07, "loss": 0.7544, "step": 25697 }, { "epoch": 0.9076997116857772, "grad_norm": 1.9645226001739502, "learning_rate": 2.2174273459935336e-07, "loss": 0.7664, "step": 25698 }, { "epoch": 0.9077350334894851, "grad_norm": 2.125614881515503, "learning_rate": 2.2157430891025178e-07, "loss": 0.7327, "step": 25699 }, { "epoch": 0.907770355293193, "grad_norm": 1.7440695762634277, "learning_rate": 2.2140594576028685e-07, "loss": 0.7766, "step": 25700 }, { "epoch": 0.907805677096901, "grad_norm": 1.9006742238998413, "learning_rate": 2.2123764515166236e-07, "loss": 0.7466, "step": 25701 }, { "epoch": 0.9078409989006089, "grad_norm": 1.9215563535690308, "learning_rate": 2.210694070865821e-07, "loss": 0.7951, "step": 25702 }, { "epoch": 0.9078763207043168, "grad_norm": 1.5671050548553467, "learning_rate": 2.2090123156724652e-07, "loss": 0.7477, "step": 25703 }, { "epoch": 0.9079116425080247, "grad_norm": 1.864024043083191, "learning_rate": 2.207331185958572e-07, "loss": 0.7755, "step": 25704 }, { "epoch": 0.9079469643117326, "grad_norm": 1.7252440452575684, "learning_rate": 2.2056506817461465e-07, "loss": 0.7542, "step": 25705 }, { "epoch": 0.9079822861154405, "grad_norm": 1.716575264930725, "learning_rate": 2.2039708030571816e-07, "loss": 0.7515, "step": 25706 }, { "epoch": 0.9080176079191484, "grad_norm": 1.7176975011825562, "learning_rate": 2.20229154991366e-07, "loss": 0.7548, "step": 25707 }, { "epoch": 0.9080529297228563, "grad_norm": 1.9450074434280396, "learning_rate": 2.2006129223375584e-07, "loss": 0.7914, "step": 25708 }, { "epoch": 0.9080882515265642, "grad_norm": 1.6333556175231934, "learning_rate": 2.198934920350859e-07, "loss": 0.7753, "step": 25709 }, { "epoch": 0.9081235733302722, "grad_norm": 1.7127246856689453, "learning_rate": 2.1972575439755062e-07, "loss": 0.7339, "step": 25710 }, { "epoch": 0.9081588951339801, "grad_norm": 1.622460126876831, "learning_rate": 2.1955807932334594e-07, "loss": 0.7137, "step": 25711 }, { "epoch": 0.9081942169376879, "grad_norm": 1.6334606409072876, "learning_rate": 2.1939046681466735e-07, "loss": 0.7606, "step": 25712 }, { "epoch": 0.9082295387413958, "grad_norm": 1.6794427633285522, "learning_rate": 2.19222916873707e-07, "loss": 0.7563, "step": 25713 }, { "epoch": 0.9082648605451037, "grad_norm": 1.7409478425979614, "learning_rate": 2.1905542950265812e-07, "loss": 0.7483, "step": 25714 }, { "epoch": 0.9083001823488116, "grad_norm": 1.7938404083251953, "learning_rate": 2.1888800470371398e-07, "loss": 0.7958, "step": 25715 }, { "epoch": 0.9083355041525195, "grad_norm": 2.326899290084839, "learning_rate": 2.1872064247906443e-07, "loss": 0.77, "step": 25716 }, { "epoch": 0.9083708259562274, "grad_norm": 1.590952754020691, "learning_rate": 2.1855334283090058e-07, "loss": 0.7688, "step": 25717 }, { "epoch": 0.9084061477599353, "grad_norm": 1.591340184211731, "learning_rate": 2.1838610576141117e-07, "loss": 0.7652, "step": 25718 }, { "epoch": 0.9084414695636432, "grad_norm": 1.671966314315796, "learning_rate": 2.1821893127278616e-07, "loss": 0.7583, "step": 25719 }, { "epoch": 0.9084767913673512, "grad_norm": 1.8029366731643677, "learning_rate": 2.1805181936721264e-07, "loss": 0.7628, "step": 25720 }, { "epoch": 0.9085121131710591, "grad_norm": 2.5005030632019043, "learning_rate": 2.178847700468778e-07, "loss": 0.7711, "step": 25721 }, { "epoch": 0.908547434974767, "grad_norm": 1.5791531801223755, "learning_rate": 2.1771778331396875e-07, "loss": 0.735, "step": 25722 }, { "epoch": 0.9085827567784749, "grad_norm": 1.6472344398498535, "learning_rate": 2.1755085917066986e-07, "loss": 0.7668, "step": 25723 }, { "epoch": 0.9086180785821828, "grad_norm": 1.5709741115570068, "learning_rate": 2.1738399761916607e-07, "loss": 0.7505, "step": 25724 }, { "epoch": 0.9086534003858907, "grad_norm": 1.7810280323028564, "learning_rate": 2.1721719866164225e-07, "loss": 0.7444, "step": 25725 }, { "epoch": 0.9086887221895986, "grad_norm": 1.6999510526657104, "learning_rate": 2.1705046230028005e-07, "loss": 0.7904, "step": 25726 }, { "epoch": 0.9087240439933065, "grad_norm": 1.594256043434143, "learning_rate": 2.1688378853726267e-07, "loss": 0.7771, "step": 25727 }, { "epoch": 0.9087593657970144, "grad_norm": 1.9467073678970337, "learning_rate": 2.1671717737477116e-07, "loss": 0.7653, "step": 25728 }, { "epoch": 0.9087946876007224, "grad_norm": 1.6221086978912354, "learning_rate": 2.1655062881498545e-07, "loss": 0.7561, "step": 25729 }, { "epoch": 0.9088300094044303, "grad_norm": 1.6253578662872314, "learning_rate": 2.1638414286008657e-07, "loss": 0.7722, "step": 25730 }, { "epoch": 0.9088653312081382, "grad_norm": 1.8193777799606323, "learning_rate": 2.1621771951225278e-07, "loss": 0.7936, "step": 25731 }, { "epoch": 0.9089006530118461, "grad_norm": 1.8225799798965454, "learning_rate": 2.160513587736618e-07, "loss": 0.7657, "step": 25732 }, { "epoch": 0.908935974815554, "grad_norm": 1.8310905694961548, "learning_rate": 2.158850606464913e-07, "loss": 0.7817, "step": 25733 }, { "epoch": 0.9089712966192619, "grad_norm": 1.6310683488845825, "learning_rate": 2.1571882513291898e-07, "loss": 0.7702, "step": 25734 }, { "epoch": 0.9090066184229698, "grad_norm": 1.6681935787200928, "learning_rate": 2.1555265223511867e-07, "loss": 0.8072, "step": 25735 }, { "epoch": 0.9090419402266777, "grad_norm": 1.7584176063537598, "learning_rate": 2.153865419552653e-07, "loss": 0.7468, "step": 25736 }, { "epoch": 0.9090772620303856, "grad_norm": 1.6728780269622803, "learning_rate": 2.1522049429553383e-07, "loss": 0.7767, "step": 25737 }, { "epoch": 0.9091125838340934, "grad_norm": 1.6749998331069946, "learning_rate": 2.1505450925809746e-07, "loss": 0.759, "step": 25738 }, { "epoch": 0.9091479056378013, "grad_norm": 1.578384280204773, "learning_rate": 2.148885868451278e-07, "loss": 0.7569, "step": 25739 }, { "epoch": 0.9091832274415093, "grad_norm": 1.5998966693878174, "learning_rate": 2.147227270587965e-07, "loss": 0.7175, "step": 25740 }, { "epoch": 0.9092185492452172, "grad_norm": 1.8452999591827393, "learning_rate": 2.1455692990127563e-07, "loss": 0.7688, "step": 25741 }, { "epoch": 0.9092538710489251, "grad_norm": 1.6795841455459595, "learning_rate": 2.1439119537473296e-07, "loss": 0.7688, "step": 25742 }, { "epoch": 0.909289192852633, "grad_norm": 3.1895060539245605, "learning_rate": 2.1422552348133896e-07, "loss": 0.7714, "step": 25743 }, { "epoch": 0.9093245146563409, "grad_norm": 1.8493250608444214, "learning_rate": 2.1405991422326243e-07, "loss": 0.8018, "step": 25744 }, { "epoch": 0.9093598364600488, "grad_norm": 1.5853406190872192, "learning_rate": 2.1389436760266945e-07, "loss": 0.7615, "step": 25745 }, { "epoch": 0.9093951582637567, "grad_norm": 1.9388153553009033, "learning_rate": 2.1372888362172718e-07, "loss": 0.7766, "step": 25746 }, { "epoch": 0.9094304800674646, "grad_norm": 1.773469090461731, "learning_rate": 2.1356346228260217e-07, "loss": 0.75, "step": 25747 }, { "epoch": 0.9094658018711725, "grad_norm": 1.7514492273330688, "learning_rate": 2.1339810358745826e-07, "loss": 0.7621, "step": 25748 }, { "epoch": 0.9095011236748805, "grad_norm": 1.571936011314392, "learning_rate": 2.1323280753845988e-07, "loss": 0.7452, "step": 25749 }, { "epoch": 0.9095364454785884, "grad_norm": 1.9399064779281616, "learning_rate": 2.130675741377719e-07, "loss": 0.7763, "step": 25750 }, { "epoch": 0.9095717672822963, "grad_norm": 2.0622189044952393, "learning_rate": 2.1290240338755486e-07, "loss": 0.7824, "step": 25751 }, { "epoch": 0.9096070890860042, "grad_norm": 1.671090006828308, "learning_rate": 2.127372952899709e-07, "loss": 0.7552, "step": 25752 }, { "epoch": 0.9096424108897121, "grad_norm": 2.417799949645996, "learning_rate": 2.125722498471816e-07, "loss": 0.7389, "step": 25753 }, { "epoch": 0.90967773269342, "grad_norm": 2.1007936000823975, "learning_rate": 2.1240726706134807e-07, "loss": 0.7475, "step": 25754 }, { "epoch": 0.9097130544971279, "grad_norm": 1.6350243091583252, "learning_rate": 2.1224234693462686e-07, "loss": 0.7649, "step": 25755 }, { "epoch": 0.9097483763008358, "grad_norm": 1.8781185150146484, "learning_rate": 2.1207748946917796e-07, "loss": 0.7836, "step": 25756 }, { "epoch": 0.9097836981045437, "grad_norm": 1.6023017168045044, "learning_rate": 2.1191269466715957e-07, "loss": 0.7793, "step": 25757 }, { "epoch": 0.9098190199082516, "grad_norm": 1.532747745513916, "learning_rate": 2.1174796253072672e-07, "loss": 0.7825, "step": 25758 }, { "epoch": 0.9098543417119596, "grad_norm": 15.160774230957031, "learning_rate": 2.115832930620365e-07, "loss": 0.7905, "step": 25759 }, { "epoch": 0.9098896635156675, "grad_norm": 2.1724042892456055, "learning_rate": 2.1141868626324502e-07, "loss": 0.7446, "step": 25760 }, { "epoch": 0.9099249853193754, "grad_norm": 1.746705174446106, "learning_rate": 2.1125414213650498e-07, "loss": 0.7358, "step": 25761 }, { "epoch": 0.9099603071230833, "grad_norm": 2.269561529159546, "learning_rate": 2.110896606839702e-07, "loss": 0.7684, "step": 25762 }, { "epoch": 0.9099956289267912, "grad_norm": 1.628495216369629, "learning_rate": 2.1092524190779395e-07, "loss": 0.7586, "step": 25763 }, { "epoch": 0.910030950730499, "grad_norm": 1.7215204238891602, "learning_rate": 2.1076088581012843e-07, "loss": 0.7856, "step": 25764 }, { "epoch": 0.9100662725342069, "grad_norm": 1.7698147296905518, "learning_rate": 2.1059659239312357e-07, "loss": 0.7334, "step": 25765 }, { "epoch": 0.9101015943379148, "grad_norm": 1.689619779586792, "learning_rate": 2.1043236165893044e-07, "loss": 0.75, "step": 25766 }, { "epoch": 0.9101369161416227, "grad_norm": 2.189795970916748, "learning_rate": 2.1026819360969842e-07, "loss": 0.7526, "step": 25767 }, { "epoch": 0.9101722379453306, "grad_norm": 1.9257458448410034, "learning_rate": 2.1010408824757522e-07, "loss": 0.7799, "step": 25768 }, { "epoch": 0.9102075597490386, "grad_norm": 1.9727524518966675, "learning_rate": 2.099400455747097e-07, "loss": 0.7655, "step": 25769 }, { "epoch": 0.9102428815527465, "grad_norm": 1.642429232597351, "learning_rate": 2.0977606559324902e-07, "loss": 0.7731, "step": 25770 }, { "epoch": 0.9102782033564544, "grad_norm": 8.429696083068848, "learning_rate": 2.0961214830533816e-07, "loss": 0.739, "step": 25771 }, { "epoch": 0.9103135251601623, "grad_norm": 1.8092899322509766, "learning_rate": 2.0944829371312314e-07, "loss": 0.7521, "step": 25772 }, { "epoch": 0.9103488469638702, "grad_norm": 1.7902120351791382, "learning_rate": 2.0928450181874837e-07, "loss": 0.7685, "step": 25773 }, { "epoch": 0.9103841687675781, "grad_norm": 1.8178229331970215, "learning_rate": 2.091207726243577e-07, "loss": 0.7291, "step": 25774 }, { "epoch": 0.910419490571286, "grad_norm": 1.6703603267669678, "learning_rate": 2.089571061320933e-07, "loss": 0.7435, "step": 25775 }, { "epoch": 0.9104548123749939, "grad_norm": 1.8515011072158813, "learning_rate": 2.0879350234409734e-07, "loss": 0.7623, "step": 25776 }, { "epoch": 0.9104901341787018, "grad_norm": 1.7824816703796387, "learning_rate": 2.0862996126251256e-07, "loss": 0.7584, "step": 25777 }, { "epoch": 0.9105254559824097, "grad_norm": 1.8730038404464722, "learning_rate": 2.0846648288947725e-07, "loss": 0.7944, "step": 25778 }, { "epoch": 0.9105607777861177, "grad_norm": 1.7490620613098145, "learning_rate": 2.0830306722713134e-07, "loss": 0.7765, "step": 25779 }, { "epoch": 0.9105960995898256, "grad_norm": 1.6247539520263672, "learning_rate": 2.0813971427761537e-07, "loss": 0.7586, "step": 25780 }, { "epoch": 0.9106314213935335, "grad_norm": 1.6964212656021118, "learning_rate": 2.079764240430654e-07, "loss": 0.763, "step": 25781 }, { "epoch": 0.9106667431972414, "grad_norm": 1.7220001220703125, "learning_rate": 2.0781319652561915e-07, "loss": 0.7932, "step": 25782 }, { "epoch": 0.9107020650009493, "grad_norm": 1.6337963342666626, "learning_rate": 2.0765003172741383e-07, "loss": 0.7721, "step": 25783 }, { "epoch": 0.9107373868046572, "grad_norm": 1.7621781826019287, "learning_rate": 2.0748692965058326e-07, "loss": 0.7426, "step": 25784 }, { "epoch": 0.9107727086083651, "grad_norm": 1.722874641418457, "learning_rate": 2.073238902972624e-07, "loss": 0.7407, "step": 25785 }, { "epoch": 0.910808030412073, "grad_norm": 1.7340162992477417, "learning_rate": 2.0716091366958678e-07, "loss": 0.7907, "step": 25786 }, { "epoch": 0.9108433522157809, "grad_norm": 1.79624342918396, "learning_rate": 2.069979997696875e-07, "loss": 0.7589, "step": 25787 }, { "epoch": 0.9108786740194889, "grad_norm": 1.7933299541473389, "learning_rate": 2.0683514859969723e-07, "loss": 0.7581, "step": 25788 }, { "epoch": 0.9109139958231968, "grad_norm": 1.9096201658248901, "learning_rate": 2.0667236016174763e-07, "loss": 0.7549, "step": 25789 }, { "epoch": 0.9109493176269046, "grad_norm": 1.7379871606826782, "learning_rate": 2.0650963445796923e-07, "loss": 0.7503, "step": 25790 }, { "epoch": 0.9109846394306125, "grad_norm": 1.8447049856185913, "learning_rate": 2.06346971490492e-07, "loss": 0.7689, "step": 25791 }, { "epoch": 0.9110199612343204, "grad_norm": 1.7403366565704346, "learning_rate": 2.061843712614442e-07, "loss": 0.7488, "step": 25792 }, { "epoch": 0.9110552830380283, "grad_norm": 1.7122076749801636, "learning_rate": 2.0602183377295414e-07, "loss": 0.7913, "step": 25793 }, { "epoch": 0.9110906048417362, "grad_norm": 1.8932820558547974, "learning_rate": 2.0585935902714904e-07, "loss": 0.79, "step": 25794 }, { "epoch": 0.9111259266454441, "grad_norm": 1.6911965608596802, "learning_rate": 2.056969470261555e-07, "loss": 0.7712, "step": 25795 }, { "epoch": 0.911161248449152, "grad_norm": 2.0404915809631348, "learning_rate": 2.055345977720996e-07, "loss": 0.7395, "step": 25796 }, { "epoch": 0.9111965702528599, "grad_norm": 2.108074426651001, "learning_rate": 2.0537231126710465e-07, "loss": 0.7339, "step": 25797 }, { "epoch": 0.9112318920565678, "grad_norm": 1.0034469366073608, "learning_rate": 2.052100875132962e-07, "loss": 0.6008, "step": 25798 }, { "epoch": 0.9112672138602758, "grad_norm": 1.012819528579712, "learning_rate": 2.0504792651279692e-07, "loss": 0.6045, "step": 25799 }, { "epoch": 0.9113025356639837, "grad_norm": 1.7585501670837402, "learning_rate": 2.0488582826772852e-07, "loss": 0.8087, "step": 25800 }, { "epoch": 0.9113378574676916, "grad_norm": 1.6238932609558105, "learning_rate": 2.0472379278021315e-07, "loss": 0.737, "step": 25801 }, { "epoch": 0.9113731792713995, "grad_norm": 1.5871092081069946, "learning_rate": 2.045618200523719e-07, "loss": 0.731, "step": 25802 }, { "epoch": 0.9114085010751074, "grad_norm": 1.8793631792068481, "learning_rate": 2.0439991008632364e-07, "loss": 0.764, "step": 25803 }, { "epoch": 0.9114438228788153, "grad_norm": 1.6268337965011597, "learning_rate": 2.042380628841878e-07, "loss": 0.7338, "step": 25804 }, { "epoch": 0.9114791446825232, "grad_norm": 0.8916484713554382, "learning_rate": 2.040762784480832e-07, "loss": 0.5744, "step": 25805 }, { "epoch": 0.9115144664862311, "grad_norm": 1.6618844270706177, "learning_rate": 2.0391455678012652e-07, "loss": 0.7964, "step": 25806 }, { "epoch": 0.911549788289939, "grad_norm": 1.5836548805236816, "learning_rate": 2.0375289788243435e-07, "loss": 0.7495, "step": 25807 }, { "epoch": 0.911585110093647, "grad_norm": 1.5780084133148193, "learning_rate": 2.0359130175712283e-07, "loss": 0.7463, "step": 25808 }, { "epoch": 0.9116204318973549, "grad_norm": 1.601452350616455, "learning_rate": 2.0342976840630746e-07, "loss": 0.7317, "step": 25809 }, { "epoch": 0.9116557537010628, "grad_norm": 1.72624671459198, "learning_rate": 2.0326829783210045e-07, "loss": 0.7619, "step": 25810 }, { "epoch": 0.9116910755047707, "grad_norm": 2.2524502277374268, "learning_rate": 2.0310689003661622e-07, "loss": 0.7586, "step": 25811 }, { "epoch": 0.9117263973084786, "grad_norm": 1.781012773513794, "learning_rate": 2.0294554502196805e-07, "loss": 0.7902, "step": 25812 }, { "epoch": 0.9117617191121865, "grad_norm": 1.638107180595398, "learning_rate": 2.0278426279026654e-07, "loss": 0.7535, "step": 25813 }, { "epoch": 0.9117970409158944, "grad_norm": 1.8696895837783813, "learning_rate": 2.026230433436227e-07, "loss": 0.7634, "step": 25814 }, { "epoch": 0.9118323627196023, "grad_norm": 1.8101624250411987, "learning_rate": 2.0246188668414712e-07, "loss": 0.762, "step": 25815 }, { "epoch": 0.9118676845233101, "grad_norm": 1.6041126251220703, "learning_rate": 2.0230079281394754e-07, "loss": 0.7589, "step": 25816 }, { "epoch": 0.911903006327018, "grad_norm": 1.6873447895050049, "learning_rate": 2.0213976173513395e-07, "loss": 0.7725, "step": 25817 }, { "epoch": 0.911938328130726, "grad_norm": 1.8175222873687744, "learning_rate": 2.019787934498135e-07, "loss": 0.7273, "step": 25818 }, { "epoch": 0.9119736499344339, "grad_norm": 1.5624313354492188, "learning_rate": 2.018178879600924e-07, "loss": 0.7287, "step": 25819 }, { "epoch": 0.9120089717381418, "grad_norm": 1.7078850269317627, "learning_rate": 2.016570452680766e-07, "loss": 0.739, "step": 25820 }, { "epoch": 0.9120442935418497, "grad_norm": 1.5400316715240479, "learning_rate": 2.014962653758712e-07, "loss": 0.7477, "step": 25821 }, { "epoch": 0.9120796153455576, "grad_norm": 1.6019814014434814, "learning_rate": 2.0133554828558166e-07, "loss": 0.7569, "step": 25822 }, { "epoch": 0.9121149371492655, "grad_norm": 1.702419638633728, "learning_rate": 2.0117489399930968e-07, "loss": 0.7728, "step": 25823 }, { "epoch": 0.9121502589529734, "grad_norm": 1.8735263347625732, "learning_rate": 2.0101430251915855e-07, "loss": 0.7636, "step": 25824 }, { "epoch": 0.9121855807566813, "grad_norm": 1.7067296504974365, "learning_rate": 2.008537738472305e-07, "loss": 0.7721, "step": 25825 }, { "epoch": 0.9122209025603892, "grad_norm": 1.5511631965637207, "learning_rate": 2.0069330798562602e-07, "loss": 0.7584, "step": 25826 }, { "epoch": 0.9122562243640971, "grad_norm": 1.6724542379379272, "learning_rate": 2.0053290493644517e-07, "loss": 0.7604, "step": 25827 }, { "epoch": 0.9122915461678051, "grad_norm": 1.820029377937317, "learning_rate": 2.0037256470178845e-07, "loss": 0.7711, "step": 25828 }, { "epoch": 0.912326867971513, "grad_norm": 1.689259648323059, "learning_rate": 2.0021228728375252e-07, "loss": 0.827, "step": 25829 }, { "epoch": 0.9123621897752209, "grad_norm": 3.4128193855285645, "learning_rate": 2.0005207268443573e-07, "loss": 0.7519, "step": 25830 }, { "epoch": 0.9123975115789288, "grad_norm": 2.0174601078033447, "learning_rate": 1.9989192090593467e-07, "loss": 0.7605, "step": 25831 }, { "epoch": 0.9124328333826367, "grad_norm": 1.6369178295135498, "learning_rate": 1.9973183195034717e-07, "loss": 0.7518, "step": 25832 }, { "epoch": 0.9124681551863446, "grad_norm": 1.7078857421875, "learning_rate": 1.99571805819766e-07, "loss": 0.7555, "step": 25833 }, { "epoch": 0.9125034769900525, "grad_norm": 1.6982237100601196, "learning_rate": 1.9941184251628664e-07, "loss": 0.7694, "step": 25834 }, { "epoch": 0.9125387987937604, "grad_norm": 1.6241809129714966, "learning_rate": 1.992519420420036e-07, "loss": 0.7604, "step": 25835 }, { "epoch": 0.9125741205974683, "grad_norm": 1.7435880899429321, "learning_rate": 1.9909210439900795e-07, "loss": 0.7549, "step": 25836 }, { "epoch": 0.9126094424011763, "grad_norm": 1.8669447898864746, "learning_rate": 1.9893232958939247e-07, "loss": 0.7473, "step": 25837 }, { "epoch": 0.9126447642048842, "grad_norm": 1.8785715103149414, "learning_rate": 1.987726176152488e-07, "loss": 0.7702, "step": 25838 }, { "epoch": 0.9126800860085921, "grad_norm": 1.9224729537963867, "learning_rate": 1.9861296847866586e-07, "loss": 0.7735, "step": 25839 }, { "epoch": 0.9127154078123, "grad_norm": 1.7200933694839478, "learning_rate": 1.984533821817336e-07, "loss": 0.7264, "step": 25840 }, { "epoch": 0.9127507296160079, "grad_norm": 1.6341859102249146, "learning_rate": 1.982938587265415e-07, "loss": 0.7629, "step": 25841 }, { "epoch": 0.9127860514197157, "grad_norm": 1.6376323699951172, "learning_rate": 1.9813439811517677e-07, "loss": 0.7644, "step": 25842 }, { "epoch": 0.9128213732234236, "grad_norm": 1.667453646659851, "learning_rate": 1.979750003497255e-07, "loss": 0.7736, "step": 25843 }, { "epoch": 0.9128566950271315, "grad_norm": 1.7841322422027588, "learning_rate": 1.9781566543227604e-07, "loss": 0.7678, "step": 25844 }, { "epoch": 0.9128920168308394, "grad_norm": 1.7319889068603516, "learning_rate": 1.9765639336491172e-07, "loss": 0.7728, "step": 25845 }, { "epoch": 0.9129273386345473, "grad_norm": 1.6378185749053955, "learning_rate": 1.9749718414971807e-07, "loss": 0.7866, "step": 25846 }, { "epoch": 0.9129626604382552, "grad_norm": 1.774039387702942, "learning_rate": 1.9733803778877846e-07, "loss": 0.7768, "step": 25847 }, { "epoch": 0.9129979822419632, "grad_norm": 2.102033853530884, "learning_rate": 1.9717895428417565e-07, "loss": 0.7758, "step": 25848 }, { "epoch": 0.9130333040456711, "grad_norm": 1.7185014486312866, "learning_rate": 1.9701993363799188e-07, "loss": 0.7689, "step": 25849 }, { "epoch": 0.913068625849379, "grad_norm": 0.8543288111686707, "learning_rate": 1.9686097585230824e-07, "loss": 0.551, "step": 25850 }, { "epoch": 0.9131039476530869, "grad_norm": 1.8762047290802002, "learning_rate": 1.9670208092920585e-07, "loss": 0.7734, "step": 25851 }, { "epoch": 0.9131392694567948, "grad_norm": 1.631714105606079, "learning_rate": 1.9654324887076248e-07, "loss": 0.7724, "step": 25852 }, { "epoch": 0.9131745912605027, "grad_norm": 1.9065437316894531, "learning_rate": 1.9638447967905873e-07, "loss": 0.7597, "step": 25853 }, { "epoch": 0.9132099130642106, "grad_norm": 1.769368290901184, "learning_rate": 1.9622577335617232e-07, "loss": 0.7517, "step": 25854 }, { "epoch": 0.9132452348679185, "grad_norm": 2.051149368286133, "learning_rate": 1.9606712990417998e-07, "loss": 0.7613, "step": 25855 }, { "epoch": 0.9132805566716264, "grad_norm": 2.0191454887390137, "learning_rate": 1.9590854932515725e-07, "loss": 0.7664, "step": 25856 }, { "epoch": 0.9133158784753344, "grad_norm": 1.570398211479187, "learning_rate": 1.9575003162118135e-07, "loss": 0.7209, "step": 25857 }, { "epoch": 0.9133512002790423, "grad_norm": 1.7149947881698608, "learning_rate": 1.9559157679432504e-07, "loss": 0.7953, "step": 25858 }, { "epoch": 0.9133865220827502, "grad_norm": 1.590928316116333, "learning_rate": 1.954331848466634e-07, "loss": 0.7877, "step": 25859 }, { "epoch": 0.9134218438864581, "grad_norm": 1.9836483001708984, "learning_rate": 1.9527485578026916e-07, "loss": 0.7646, "step": 25860 }, { "epoch": 0.913457165690166, "grad_norm": 1.6741193532943726, "learning_rate": 1.9511658959721459e-07, "loss": 0.8027, "step": 25861 }, { "epoch": 0.9134924874938739, "grad_norm": 1.682041049003601, "learning_rate": 1.949583862995702e-07, "loss": 0.7832, "step": 25862 }, { "epoch": 0.9135278092975818, "grad_norm": 1.648344874382019, "learning_rate": 1.9480024588940827e-07, "loss": 0.7665, "step": 25863 }, { "epoch": 0.9135631311012897, "grad_norm": 1.7634291648864746, "learning_rate": 1.9464216836879656e-07, "loss": 0.7749, "step": 25864 }, { "epoch": 0.9135984529049976, "grad_norm": 2.2957122325897217, "learning_rate": 1.9448415373980512e-07, "loss": 0.7941, "step": 25865 }, { "epoch": 0.9136337747087055, "grad_norm": 2.0509588718414307, "learning_rate": 1.943262020045017e-07, "loss": 0.7656, "step": 25866 }, { "epoch": 0.9136690965124135, "grad_norm": 4.86212158203125, "learning_rate": 1.941683131649541e-07, "loss": 0.7533, "step": 25867 }, { "epoch": 0.9137044183161213, "grad_norm": 1.5243757963180542, "learning_rate": 1.9401048722322735e-07, "loss": 0.7324, "step": 25868 }, { "epoch": 0.9137397401198292, "grad_norm": 1.6238049268722534, "learning_rate": 1.9385272418138812e-07, "loss": 0.7588, "step": 25869 }, { "epoch": 0.9137750619235371, "grad_norm": 1.9534199237823486, "learning_rate": 1.9369502404150143e-07, "loss": 0.7919, "step": 25870 }, { "epoch": 0.913810383727245, "grad_norm": 1.7295339107513428, "learning_rate": 1.935373868056306e-07, "loss": 0.7758, "step": 25871 }, { "epoch": 0.9138457055309529, "grad_norm": 0.9253553748130798, "learning_rate": 1.9337981247583847e-07, "loss": 0.5565, "step": 25872 }, { "epoch": 0.9138810273346608, "grad_norm": 1.7569838762283325, "learning_rate": 1.9322230105418837e-07, "loss": 0.7522, "step": 25873 }, { "epoch": 0.9139163491383687, "grad_norm": 1.555863618850708, "learning_rate": 1.9306485254274087e-07, "loss": 0.7823, "step": 25874 }, { "epoch": 0.9139516709420766, "grad_norm": 1.6177688837051392, "learning_rate": 1.929074669435571e-07, "loss": 0.774, "step": 25875 }, { "epoch": 0.9139869927457845, "grad_norm": 1.7336925268173218, "learning_rate": 1.927501442586971e-07, "loss": 0.7594, "step": 25876 }, { "epoch": 0.9140223145494925, "grad_norm": 1.9106078147888184, "learning_rate": 1.9259288449021917e-07, "loss": 0.768, "step": 25877 }, { "epoch": 0.9140576363532004, "grad_norm": 1.754892349243164, "learning_rate": 1.9243568764018172e-07, "loss": 0.771, "step": 25878 }, { "epoch": 0.9140929581569083, "grad_norm": 1.8965532779693604, "learning_rate": 1.9227855371064197e-07, "loss": 0.7716, "step": 25879 }, { "epoch": 0.9141282799606162, "grad_norm": 1.5990344285964966, "learning_rate": 1.9212148270365772e-07, "loss": 0.7834, "step": 25880 }, { "epoch": 0.9141636017643241, "grad_norm": 1.584351658821106, "learning_rate": 1.9196447462128287e-07, "loss": 0.7947, "step": 25881 }, { "epoch": 0.914198923568032, "grad_norm": 1.8035352230072021, "learning_rate": 1.9180752946557357e-07, "loss": 0.7904, "step": 25882 }, { "epoch": 0.9142342453717399, "grad_norm": 2.2814722061157227, "learning_rate": 1.916506472385843e-07, "loss": 0.7411, "step": 25883 }, { "epoch": 0.9142695671754478, "grad_norm": 1.8269256353378296, "learning_rate": 1.914938279423667e-07, "loss": 0.7803, "step": 25884 }, { "epoch": 0.9143048889791557, "grad_norm": 1.6908255815505981, "learning_rate": 1.9133707157897363e-07, "loss": 0.7448, "step": 25885 }, { "epoch": 0.9143402107828636, "grad_norm": 1.5622304677963257, "learning_rate": 1.9118037815045787e-07, "loss": 0.7376, "step": 25886 }, { "epoch": 0.9143755325865716, "grad_norm": 1.742559552192688, "learning_rate": 1.9102374765886832e-07, "loss": 0.7713, "step": 25887 }, { "epoch": 0.9144108543902795, "grad_norm": 1.571401834487915, "learning_rate": 1.908671801062567e-07, "loss": 0.7454, "step": 25888 }, { "epoch": 0.9144461761939874, "grad_norm": 1.6332778930664062, "learning_rate": 1.9071067549467082e-07, "loss": 0.7127, "step": 25889 }, { "epoch": 0.9144814979976953, "grad_norm": 1.6058523654937744, "learning_rate": 1.9055423382616012e-07, "loss": 0.7237, "step": 25890 }, { "epoch": 0.9145168198014032, "grad_norm": 1.7850600481033325, "learning_rate": 1.9039785510277131e-07, "loss": 0.7739, "step": 25891 }, { "epoch": 0.9145521416051111, "grad_norm": 1.760376214981079, "learning_rate": 1.902415393265511e-07, "loss": 0.7645, "step": 25892 }, { "epoch": 0.914587463408819, "grad_norm": 1.6981775760650635, "learning_rate": 1.9008528649954615e-07, "loss": 0.7697, "step": 25893 }, { "epoch": 0.9146227852125268, "grad_norm": 1.6318625211715698, "learning_rate": 1.8992909662380044e-07, "loss": 0.7315, "step": 25894 }, { "epoch": 0.9146581070162347, "grad_norm": 0.9513347744941711, "learning_rate": 1.8977296970135785e-07, "loss": 0.5556, "step": 25895 }, { "epoch": 0.9146934288199426, "grad_norm": 2.5757923126220703, "learning_rate": 1.896169057342634e-07, "loss": 0.7452, "step": 25896 }, { "epoch": 0.9147287506236506, "grad_norm": 1.5314197540283203, "learning_rate": 1.8946090472455826e-07, "loss": 0.7305, "step": 25897 }, { "epoch": 0.9147640724273585, "grad_norm": 1.679833173751831, "learning_rate": 1.8930496667428412e-07, "loss": 0.7905, "step": 25898 }, { "epoch": 0.9147993942310664, "grad_norm": 1.7033125162124634, "learning_rate": 1.8914909158548324e-07, "loss": 0.7507, "step": 25899 }, { "epoch": 0.9148347160347743, "grad_norm": 1.7044891119003296, "learning_rate": 1.8899327946019396e-07, "loss": 0.7611, "step": 25900 }, { "epoch": 0.9148700378384822, "grad_norm": 1.7202214002609253, "learning_rate": 1.8883753030045582e-07, "loss": 0.7607, "step": 25901 }, { "epoch": 0.9149053596421901, "grad_norm": 1.6620842218399048, "learning_rate": 1.8868184410830882e-07, "loss": 0.7544, "step": 25902 }, { "epoch": 0.914940681445898, "grad_norm": 1.6041576862335205, "learning_rate": 1.8852622088578908e-07, "loss": 0.7979, "step": 25903 }, { "epoch": 0.9149760032496059, "grad_norm": 1.5912889242172241, "learning_rate": 1.8837066063493335e-07, "loss": 0.7622, "step": 25904 }, { "epoch": 0.9150113250533138, "grad_norm": 1.6058268547058105, "learning_rate": 1.8821516335777777e-07, "loss": 0.7481, "step": 25905 }, { "epoch": 0.9150466468570218, "grad_norm": 1.693848729133606, "learning_rate": 1.880597290563585e-07, "loss": 0.7969, "step": 25906 }, { "epoch": 0.9150819686607297, "grad_norm": 1.6474772691726685, "learning_rate": 1.879043577327083e-07, "loss": 0.7506, "step": 25907 }, { "epoch": 0.9151172904644376, "grad_norm": 1.7492282390594482, "learning_rate": 1.877490493888612e-07, "loss": 0.7641, "step": 25908 }, { "epoch": 0.9151526122681455, "grad_norm": 1.7080358266830444, "learning_rate": 1.8759380402685045e-07, "loss": 0.7803, "step": 25909 }, { "epoch": 0.9151879340718534, "grad_norm": 1.7114272117614746, "learning_rate": 1.8743862164870674e-07, "loss": 0.7863, "step": 25910 }, { "epoch": 0.9152232558755613, "grad_norm": 1.6399617195129395, "learning_rate": 1.8728350225646174e-07, "loss": 0.7627, "step": 25911 }, { "epoch": 0.9152585776792692, "grad_norm": 1.6679402589797974, "learning_rate": 1.8712844585214607e-07, "loss": 0.7371, "step": 25912 }, { "epoch": 0.9152938994829771, "grad_norm": 1.781965970993042, "learning_rate": 1.869734524377881e-07, "loss": 0.7554, "step": 25913 }, { "epoch": 0.915329221286685, "grad_norm": 1.5207690000534058, "learning_rate": 1.8681852201541673e-07, "loss": 0.7455, "step": 25914 }, { "epoch": 0.915364543090393, "grad_norm": 1.6794641017913818, "learning_rate": 1.8666365458706038e-07, "loss": 0.7781, "step": 25915 }, { "epoch": 0.9153998648941009, "grad_norm": 1.7769453525543213, "learning_rate": 1.8650885015474517e-07, "loss": 0.7551, "step": 25916 }, { "epoch": 0.9154351866978088, "grad_norm": 1.838169813156128, "learning_rate": 1.8635410872049676e-07, "loss": 0.7308, "step": 25917 }, { "epoch": 0.9154705085015167, "grad_norm": 1.7898423671722412, "learning_rate": 1.861994302863418e-07, "loss": 0.7371, "step": 25918 }, { "epoch": 0.9155058303052246, "grad_norm": 1.791183590888977, "learning_rate": 1.8604481485430315e-07, "loss": 0.7547, "step": 25919 }, { "epoch": 0.9155411521089324, "grad_norm": 1.702009677886963, "learning_rate": 1.8589026242640528e-07, "loss": 0.7876, "step": 25920 }, { "epoch": 0.9155764739126403, "grad_norm": 1.6423224210739136, "learning_rate": 1.857357730046705e-07, "loss": 0.7548, "step": 25921 }, { "epoch": 0.9156117957163482, "grad_norm": 1.658278226852417, "learning_rate": 1.8558134659112104e-07, "loss": 0.7556, "step": 25922 }, { "epoch": 0.9156471175200561, "grad_norm": 1.789681315422058, "learning_rate": 1.8542698318777807e-07, "loss": 0.7758, "step": 25923 }, { "epoch": 0.915682439323764, "grad_norm": 1.7394702434539795, "learning_rate": 1.852726827966611e-07, "loss": 0.794, "step": 25924 }, { "epoch": 0.9157177611274719, "grad_norm": 1.5979384183883667, "learning_rate": 1.851184454197913e-07, "loss": 0.7598, "step": 25925 }, { "epoch": 0.9157530829311799, "grad_norm": 1.7376919984817505, "learning_rate": 1.849642710591848e-07, "loss": 0.7902, "step": 25926 }, { "epoch": 0.9157884047348878, "grad_norm": 1.681442379951477, "learning_rate": 1.8481015971686166e-07, "loss": 0.7844, "step": 25927 }, { "epoch": 0.9158237265385957, "grad_norm": 2.0058507919311523, "learning_rate": 1.8465611139483807e-07, "loss": 0.7883, "step": 25928 }, { "epoch": 0.9158590483423036, "grad_norm": 1.760425329208374, "learning_rate": 1.8450212609512964e-07, "loss": 0.7863, "step": 25929 }, { "epoch": 0.9158943701460115, "grad_norm": 1.61194908618927, "learning_rate": 1.8434820381975195e-07, "loss": 0.775, "step": 25930 }, { "epoch": 0.9159296919497194, "grad_norm": 1.6423088312149048, "learning_rate": 1.841943445707206e-07, "loss": 0.7521, "step": 25931 }, { "epoch": 0.9159650137534273, "grad_norm": 1.5147589445114136, "learning_rate": 1.840405483500479e-07, "loss": 0.7705, "step": 25932 }, { "epoch": 0.9160003355571352, "grad_norm": 1.652353286743164, "learning_rate": 1.8388681515974727e-07, "loss": 0.7664, "step": 25933 }, { "epoch": 0.9160356573608431, "grad_norm": 1.6089979410171509, "learning_rate": 1.8373314500183093e-07, "loss": 0.7202, "step": 25934 }, { "epoch": 0.916070979164551, "grad_norm": 1.6687616109848022, "learning_rate": 1.835795378783095e-07, "loss": 0.7601, "step": 25935 }, { "epoch": 0.916106300968259, "grad_norm": 1.6918329000473022, "learning_rate": 1.8342599379119309e-07, "loss": 0.7805, "step": 25936 }, { "epoch": 0.9161416227719669, "grad_norm": 1.6718107461929321, "learning_rate": 1.8327251274249225e-07, "loss": 0.7593, "step": 25937 }, { "epoch": 0.9161769445756748, "grad_norm": 1.6687774658203125, "learning_rate": 1.8311909473421706e-07, "loss": 0.7517, "step": 25938 }, { "epoch": 0.9162122663793827, "grad_norm": 1.828954815864563, "learning_rate": 1.8296573976837152e-07, "loss": 0.7519, "step": 25939 }, { "epoch": 0.9162475881830906, "grad_norm": 1.814096450805664, "learning_rate": 1.8281244784696562e-07, "loss": 0.7669, "step": 25940 }, { "epoch": 0.9162829099867985, "grad_norm": 2.1416425704956055, "learning_rate": 1.82659218972005e-07, "loss": 0.7479, "step": 25941 }, { "epoch": 0.9163182317905064, "grad_norm": 1.604796290397644, "learning_rate": 1.8250605314549476e-07, "loss": 0.7807, "step": 25942 }, { "epoch": 0.9163535535942143, "grad_norm": 1.698765754699707, "learning_rate": 1.8235295036943934e-07, "loss": 0.7429, "step": 25943 }, { "epoch": 0.9163888753979222, "grad_norm": 1.7284214496612549, "learning_rate": 1.8219991064584385e-07, "loss": 0.7613, "step": 25944 }, { "epoch": 0.9164241972016302, "grad_norm": 1.6019458770751953, "learning_rate": 1.8204693397670947e-07, "loss": 0.7549, "step": 25945 }, { "epoch": 0.916459519005338, "grad_norm": 1.6427401304244995, "learning_rate": 1.8189402036403903e-07, "loss": 0.7708, "step": 25946 }, { "epoch": 0.9164948408090459, "grad_norm": 1.7010724544525146, "learning_rate": 1.8174116980983424e-07, "loss": 0.7466, "step": 25947 }, { "epoch": 0.9165301626127538, "grad_norm": 1.7481980323791504, "learning_rate": 1.8158838231609576e-07, "loss": 0.7527, "step": 25948 }, { "epoch": 0.9165654844164617, "grad_norm": 1.7036268711090088, "learning_rate": 1.8143565788482198e-07, "loss": 0.7761, "step": 25949 }, { "epoch": 0.9166008062201696, "grad_norm": 1.739802598953247, "learning_rate": 1.8128299651801296e-07, "loss": 0.7459, "step": 25950 }, { "epoch": 0.9166361280238775, "grad_norm": 1.6092637777328491, "learning_rate": 1.8113039821766653e-07, "loss": 0.7973, "step": 25951 }, { "epoch": 0.9166714498275854, "grad_norm": 2.9548442363739014, "learning_rate": 1.809778629857789e-07, "loss": 0.776, "step": 25952 }, { "epoch": 0.9167067716312933, "grad_norm": 1.8471410274505615, "learning_rate": 1.8082539082434736e-07, "loss": 0.8036, "step": 25953 }, { "epoch": 0.9167420934350012, "grad_norm": 1.6180708408355713, "learning_rate": 1.806729817353675e-07, "loss": 0.7652, "step": 25954 }, { "epoch": 0.9167774152387091, "grad_norm": 1.793609619140625, "learning_rate": 1.8052063572083333e-07, "loss": 0.7573, "step": 25955 }, { "epoch": 0.9168127370424171, "grad_norm": 1.7768946886062622, "learning_rate": 1.8036835278273933e-07, "loss": 0.7485, "step": 25956 }, { "epoch": 0.916848058846125, "grad_norm": 1.6351405382156372, "learning_rate": 1.8021613292307838e-07, "loss": 0.73, "step": 25957 }, { "epoch": 0.9168833806498329, "grad_norm": 1.7588077783584595, "learning_rate": 1.800639761438433e-07, "loss": 0.7871, "step": 25958 }, { "epoch": 0.9169187024535408, "grad_norm": 1.8663322925567627, "learning_rate": 1.7991188244702362e-07, "loss": 0.775, "step": 25959 }, { "epoch": 0.9169540242572487, "grad_norm": 1.7398887872695923, "learning_rate": 1.7975985183461164e-07, "loss": 0.7588, "step": 25960 }, { "epoch": 0.9169893460609566, "grad_norm": 1.8120146989822388, "learning_rate": 1.7960788430859687e-07, "loss": 0.7576, "step": 25961 }, { "epoch": 0.9170246678646645, "grad_norm": 1.8295918703079224, "learning_rate": 1.7945597987096773e-07, "loss": 0.7866, "step": 25962 }, { "epoch": 0.9170599896683724, "grad_norm": 1.7469849586486816, "learning_rate": 1.7930413852371264e-07, "loss": 0.7695, "step": 25963 }, { "epoch": 0.9170953114720803, "grad_norm": 0.9993714094161987, "learning_rate": 1.7915236026881887e-07, "loss": 0.5621, "step": 25964 }, { "epoch": 0.9171306332757883, "grad_norm": 2.2316219806671143, "learning_rate": 1.7900064510827263e-07, "loss": 0.7574, "step": 25965 }, { "epoch": 0.9171659550794962, "grad_norm": 1.720487117767334, "learning_rate": 1.7884899304405955e-07, "loss": 0.7863, "step": 25966 }, { "epoch": 0.9172012768832041, "grad_norm": 1.6811357736587524, "learning_rate": 1.7869740407816527e-07, "loss": 0.7493, "step": 25967 }, { "epoch": 0.917236598686912, "grad_norm": 1.6398917436599731, "learning_rate": 1.7854587821257207e-07, "loss": 0.7596, "step": 25968 }, { "epoch": 0.9172719204906199, "grad_norm": 1.6332505941390991, "learning_rate": 1.783944154492645e-07, "loss": 0.7409, "step": 25969 }, { "epoch": 0.9173072422943278, "grad_norm": 2.377408266067505, "learning_rate": 1.7824301579022485e-07, "loss": 0.7466, "step": 25970 }, { "epoch": 0.9173425640980357, "grad_norm": 1.648203730583191, "learning_rate": 1.7809167923743377e-07, "loss": 0.7818, "step": 25971 }, { "epoch": 0.9173778859017435, "grad_norm": 1.774719476699829, "learning_rate": 1.7794040579287186e-07, "loss": 0.7323, "step": 25972 }, { "epoch": 0.9174132077054514, "grad_norm": 1.6748565435409546, "learning_rate": 1.777891954585209e-07, "loss": 0.7894, "step": 25973 }, { "epoch": 0.9174485295091593, "grad_norm": 1.985693335533142, "learning_rate": 1.7763804823635711e-07, "loss": 0.7764, "step": 25974 }, { "epoch": 0.9174838513128672, "grad_norm": 1.9434250593185425, "learning_rate": 1.7748696412836107e-07, "loss": 0.7434, "step": 25975 }, { "epoch": 0.9175191731165752, "grad_norm": 1.8944814205169678, "learning_rate": 1.773359431365085e-07, "loss": 0.7541, "step": 25976 }, { "epoch": 0.9175544949202831, "grad_norm": 2.1625473499298096, "learning_rate": 1.7718498526277716e-07, "loss": 0.7414, "step": 25977 }, { "epoch": 0.917589816723991, "grad_norm": 1.8639336824417114, "learning_rate": 1.7703409050914166e-07, "loss": 0.7679, "step": 25978 }, { "epoch": 0.9176251385276989, "grad_norm": 1.7425141334533691, "learning_rate": 1.7688325887757708e-07, "loss": 0.7628, "step": 25979 }, { "epoch": 0.9176604603314068, "grad_norm": 2.3442347049713135, "learning_rate": 1.7673249037005845e-07, "loss": 0.7842, "step": 25980 }, { "epoch": 0.9176957821351147, "grad_norm": 1.7459028959274292, "learning_rate": 1.765817849885576e-07, "loss": 0.7816, "step": 25981 }, { "epoch": 0.9177311039388226, "grad_norm": 1.601558804512024, "learning_rate": 1.764311427350479e-07, "loss": 0.756, "step": 25982 }, { "epoch": 0.9177664257425305, "grad_norm": 1.861823558807373, "learning_rate": 1.7628056361150113e-07, "loss": 0.7236, "step": 25983 }, { "epoch": 0.9178017475462384, "grad_norm": 1.6449334621429443, "learning_rate": 1.7613004761988684e-07, "loss": 0.7168, "step": 25984 }, { "epoch": 0.9178370693499464, "grad_norm": 1.8378771543502808, "learning_rate": 1.7597959476217563e-07, "loss": 0.7276, "step": 25985 }, { "epoch": 0.9178723911536543, "grad_norm": 1.593327283859253, "learning_rate": 1.7582920504033762e-07, "loss": 0.7272, "step": 25986 }, { "epoch": 0.9179077129573622, "grad_norm": 1.9160728454589844, "learning_rate": 1.7567887845633958e-07, "loss": 0.7398, "step": 25987 }, { "epoch": 0.9179430347610701, "grad_norm": 1.6444627046585083, "learning_rate": 1.7552861501214935e-07, "loss": 0.7467, "step": 25988 }, { "epoch": 0.917978356564778, "grad_norm": 1.788982629776001, "learning_rate": 1.7537841470973427e-07, "loss": 0.7593, "step": 25989 }, { "epoch": 0.9180136783684859, "grad_norm": 1.4691487550735474, "learning_rate": 1.7522827755105887e-07, "loss": 0.7399, "step": 25990 }, { "epoch": 0.9180490001721938, "grad_norm": 1.6105037927627563, "learning_rate": 1.750782035380888e-07, "loss": 0.7424, "step": 25991 }, { "epoch": 0.9180843219759017, "grad_norm": 1.7344856262207031, "learning_rate": 1.749281926727886e-07, "loss": 0.7742, "step": 25992 }, { "epoch": 0.9181196437796096, "grad_norm": 1.7375800609588623, "learning_rate": 1.7477824495712115e-07, "loss": 0.7626, "step": 25993 }, { "epoch": 0.9181549655833176, "grad_norm": 1.715707778930664, "learning_rate": 1.746283603930493e-07, "loss": 0.7561, "step": 25994 }, { "epoch": 0.9181902873870255, "grad_norm": 1.7025728225708008, "learning_rate": 1.7447853898253376e-07, "loss": 0.753, "step": 25995 }, { "epoch": 0.9182256091907334, "grad_norm": 1.760826587677002, "learning_rate": 1.743287807275368e-07, "loss": 0.7677, "step": 25996 }, { "epoch": 0.9182609309944413, "grad_norm": 1.5993903875350952, "learning_rate": 1.7417908563001684e-07, "loss": 0.7381, "step": 25997 }, { "epoch": 0.9182962527981492, "grad_norm": 1.924871802330017, "learning_rate": 1.7402945369193402e-07, "loss": 0.7631, "step": 25998 }, { "epoch": 0.918331574601857, "grad_norm": 1.5791997909545898, "learning_rate": 1.738798849152473e-07, "loss": 0.7503, "step": 25999 }, { "epoch": 0.9183668964055649, "grad_norm": 1.7685168981552124, "learning_rate": 1.7373037930191238e-07, "loss": 0.792, "step": 26000 }, { "epoch": 0.9184022182092728, "grad_norm": 1.7836378812789917, "learning_rate": 1.7358093685388712e-07, "loss": 0.7625, "step": 26001 }, { "epoch": 0.9184375400129807, "grad_norm": 1.7335115671157837, "learning_rate": 1.7343155757312824e-07, "loss": 0.7632, "step": 26002 }, { "epoch": 0.9184728618166886, "grad_norm": 1.0046327114105225, "learning_rate": 1.7328224146158868e-07, "loss": 0.5679, "step": 26003 }, { "epoch": 0.9185081836203965, "grad_norm": 1.8451781272888184, "learning_rate": 1.731329885212246e-07, "loss": 0.7453, "step": 26004 }, { "epoch": 0.9185435054241045, "grad_norm": 1.6512329578399658, "learning_rate": 1.7298379875398785e-07, "loss": 0.7672, "step": 26005 }, { "epoch": 0.9185788272278124, "grad_norm": 2.026634693145752, "learning_rate": 1.7283467216183292e-07, "loss": 0.7776, "step": 26006 }, { "epoch": 0.9186141490315203, "grad_norm": 1.5561243295669556, "learning_rate": 1.7268560874670936e-07, "loss": 0.7423, "step": 26007 }, { "epoch": 0.9186494708352282, "grad_norm": 1.7652320861816406, "learning_rate": 1.7253660851056953e-07, "loss": 0.7766, "step": 26008 }, { "epoch": 0.9186847926389361, "grad_norm": 1.7527819871902466, "learning_rate": 1.7238767145536295e-07, "loss": 0.7357, "step": 26009 }, { "epoch": 0.918720114442644, "grad_norm": 1.8109124898910522, "learning_rate": 1.7223879758303918e-07, "loss": 0.7476, "step": 26010 }, { "epoch": 0.9187554362463519, "grad_norm": 1.6090309619903564, "learning_rate": 1.7208998689554613e-07, "loss": 0.7511, "step": 26011 }, { "epoch": 0.9187907580500598, "grad_norm": 0.8956757187843323, "learning_rate": 1.719412393948333e-07, "loss": 0.5523, "step": 26012 }, { "epoch": 0.9188260798537677, "grad_norm": 2.016129493713379, "learning_rate": 1.7179255508284475e-07, "loss": 0.7742, "step": 26013 }, { "epoch": 0.9188614016574757, "grad_norm": 1.5578275918960571, "learning_rate": 1.7164393396152723e-07, "loss": 0.7191, "step": 26014 }, { "epoch": 0.9188967234611836, "grad_norm": 1.726021409034729, "learning_rate": 1.714953760328264e-07, "loss": 0.7408, "step": 26015 }, { "epoch": 0.9189320452648915, "grad_norm": 1.7838324308395386, "learning_rate": 1.7134688129868737e-07, "loss": 0.7358, "step": 26016 }, { "epoch": 0.9189673670685994, "grad_norm": 1.6348530054092407, "learning_rate": 1.7119844976105194e-07, "loss": 0.807, "step": 26017 }, { "epoch": 0.9190026888723073, "grad_norm": 1.8344380855560303, "learning_rate": 1.7105008142186298e-07, "loss": 0.7884, "step": 26018 }, { "epoch": 0.9190380106760152, "grad_norm": 10.498212814331055, "learning_rate": 1.7090177628306392e-07, "loss": 0.7483, "step": 26019 }, { "epoch": 0.9190733324797231, "grad_norm": 1.8086190223693848, "learning_rate": 1.707535343465938e-07, "loss": 0.8165, "step": 26020 }, { "epoch": 0.919108654283431, "grad_norm": 1.5839780569076538, "learning_rate": 1.7060535561439384e-07, "loss": 0.7645, "step": 26021 }, { "epoch": 0.9191439760871389, "grad_norm": 1.7938275337219238, "learning_rate": 1.7045724008840358e-07, "loss": 0.7996, "step": 26022 }, { "epoch": 0.9191792978908468, "grad_norm": 1.7000162601470947, "learning_rate": 1.7030918777056094e-07, "loss": 0.768, "step": 26023 }, { "epoch": 0.9192146196945548, "grad_norm": 1.758270502090454, "learning_rate": 1.7016119866280322e-07, "loss": 0.7682, "step": 26024 }, { "epoch": 0.9192499414982626, "grad_norm": 1.8159815073013306, "learning_rate": 1.7001327276706892e-07, "loss": 0.7623, "step": 26025 }, { "epoch": 0.9192852633019705, "grad_norm": 1.7566745281219482, "learning_rate": 1.69865410085292e-07, "loss": 0.7603, "step": 26026 }, { "epoch": 0.9193205851056784, "grad_norm": 1.7277649641036987, "learning_rate": 1.6971761061940872e-07, "loss": 0.7748, "step": 26027 }, { "epoch": 0.9193559069093863, "grad_norm": 1.9437174797058105, "learning_rate": 1.6956987437135364e-07, "loss": 0.7676, "step": 26028 }, { "epoch": 0.9193912287130942, "grad_norm": 1.8391724824905396, "learning_rate": 1.694222013430602e-07, "loss": 0.7908, "step": 26029 }, { "epoch": 0.9194265505168021, "grad_norm": 1.833545446395874, "learning_rate": 1.692745915364602e-07, "loss": 0.8006, "step": 26030 }, { "epoch": 0.91946187232051, "grad_norm": 1.688998818397522, "learning_rate": 1.6912704495348707e-07, "loss": 0.7306, "step": 26031 }, { "epoch": 0.9194971941242179, "grad_norm": 1.6040657758712769, "learning_rate": 1.6897956159607097e-07, "loss": 0.7246, "step": 26032 }, { "epoch": 0.9195325159279258, "grad_norm": 1.6585999727249146, "learning_rate": 1.6883214146614202e-07, "loss": 0.7621, "step": 26033 }, { "epoch": 0.9195678377316338, "grad_norm": 1.870725154876709, "learning_rate": 1.6868478456562974e-07, "loss": 0.7578, "step": 26034 }, { "epoch": 0.9196031595353417, "grad_norm": 1.6343994140625, "learning_rate": 1.685374908964632e-07, "loss": 0.752, "step": 26035 }, { "epoch": 0.9196384813390496, "grad_norm": 1.9582077264785767, "learning_rate": 1.6839026046056917e-07, "loss": 0.8021, "step": 26036 }, { "epoch": 0.9196738031427575, "grad_norm": 1.9581174850463867, "learning_rate": 1.6824309325987553e-07, "loss": 0.7798, "step": 26037 }, { "epoch": 0.9197091249464654, "grad_norm": 1.7041656970977783, "learning_rate": 1.68095989296308e-07, "loss": 0.787, "step": 26038 }, { "epoch": 0.9197444467501733, "grad_norm": 1.8945082426071167, "learning_rate": 1.6794894857179167e-07, "loss": 0.7875, "step": 26039 }, { "epoch": 0.9197797685538812, "grad_norm": 1.6926785707473755, "learning_rate": 1.6780197108825113e-07, "loss": 0.7918, "step": 26040 }, { "epoch": 0.9198150903575891, "grad_norm": 1.6225875616073608, "learning_rate": 1.676550568476104e-07, "loss": 0.7345, "step": 26041 }, { "epoch": 0.919850412161297, "grad_norm": 1.7214144468307495, "learning_rate": 1.6750820585179184e-07, "loss": 0.7902, "step": 26042 }, { "epoch": 0.919885733965005, "grad_norm": 1.6968780755996704, "learning_rate": 1.6736141810271722e-07, "loss": 0.7186, "step": 26043 }, { "epoch": 0.9199210557687129, "grad_norm": 1.9842559099197388, "learning_rate": 1.6721469360230835e-07, "loss": 0.7281, "step": 26044 }, { "epoch": 0.9199563775724208, "grad_norm": 1.5731275081634521, "learning_rate": 1.670680323524848e-07, "loss": 0.7441, "step": 26045 }, { "epoch": 0.9199916993761287, "grad_norm": 1.5368183851242065, "learning_rate": 1.6692143435516616e-07, "loss": 0.7865, "step": 26046 }, { "epoch": 0.9200270211798366, "grad_norm": 1.5849231481552124, "learning_rate": 1.6677489961227145e-07, "loss": 0.7873, "step": 26047 }, { "epoch": 0.9200623429835445, "grad_norm": 1.6660786867141724, "learning_rate": 1.6662842812571855e-07, "loss": 0.7358, "step": 26048 }, { "epoch": 0.9200976647872524, "grad_norm": 1.6594157218933105, "learning_rate": 1.664820198974243e-07, "loss": 0.7282, "step": 26049 }, { "epoch": 0.9201329865909603, "grad_norm": 1.7594047784805298, "learning_rate": 1.6633567492930435e-07, "loss": 0.726, "step": 26050 }, { "epoch": 0.9201683083946681, "grad_norm": 1.5924186706542969, "learning_rate": 1.6618939322327498e-07, "loss": 0.7919, "step": 26051 }, { "epoch": 0.920203630198376, "grad_norm": 1.6855179071426392, "learning_rate": 1.6604317478124908e-07, "loss": 0.7634, "step": 26052 }, { "epoch": 0.9202389520020839, "grad_norm": 1.8447893857955933, "learning_rate": 1.6589701960514237e-07, "loss": 0.7704, "step": 26053 }, { "epoch": 0.9202742738057919, "grad_norm": 1.708626389503479, "learning_rate": 1.6575092769686662e-07, "loss": 0.7738, "step": 26054 }, { "epoch": 0.9203095956094998, "grad_norm": 1.7144309282302856, "learning_rate": 1.6560489905833366e-07, "loss": 0.7509, "step": 26055 }, { "epoch": 0.9203449174132077, "grad_norm": 1.716566562652588, "learning_rate": 1.6545893369145528e-07, "loss": 0.7505, "step": 26056 }, { "epoch": 0.9203802392169156, "grad_norm": 1.7741972208023071, "learning_rate": 1.653130315981416e-07, "loss": 0.749, "step": 26057 }, { "epoch": 0.9204155610206235, "grad_norm": 1.6426738500595093, "learning_rate": 1.6516719278030226e-07, "loss": 0.7516, "step": 26058 }, { "epoch": 0.9204508828243314, "grad_norm": 1.7772771120071411, "learning_rate": 1.6502141723984512e-07, "loss": 0.7473, "step": 26059 }, { "epoch": 0.9204862046280393, "grad_norm": 1.6827552318572998, "learning_rate": 1.6487570497867977e-07, "loss": 0.7964, "step": 26060 }, { "epoch": 0.9205215264317472, "grad_norm": 1.7102408409118652, "learning_rate": 1.6473005599871138e-07, "loss": 0.7121, "step": 26061 }, { "epoch": 0.9205568482354551, "grad_norm": 1.4378358125686646, "learning_rate": 1.6458447030184786e-07, "loss": 0.7302, "step": 26062 }, { "epoch": 0.920592170039163, "grad_norm": 1.728351354598999, "learning_rate": 1.6443894788999327e-07, "loss": 0.7885, "step": 26063 }, { "epoch": 0.920627491842871, "grad_norm": 2.302577257156372, "learning_rate": 1.6429348876505323e-07, "loss": 0.7589, "step": 26064 }, { "epoch": 0.9206628136465789, "grad_norm": 1.7629921436309814, "learning_rate": 1.6414809292893075e-07, "loss": 0.7282, "step": 26065 }, { "epoch": 0.9206981354502868, "grad_norm": 1.8581000566482544, "learning_rate": 1.640027603835287e-07, "loss": 0.735, "step": 26066 }, { "epoch": 0.9207334572539947, "grad_norm": 1.6174073219299316, "learning_rate": 1.6385749113075e-07, "loss": 0.7667, "step": 26067 }, { "epoch": 0.9207687790577026, "grad_norm": 1.5933582782745361, "learning_rate": 1.6371228517249538e-07, "loss": 0.7599, "step": 26068 }, { "epoch": 0.9208041008614105, "grad_norm": 1.6892019510269165, "learning_rate": 1.6356714251066498e-07, "loss": 0.7549, "step": 26069 }, { "epoch": 0.9208394226651184, "grad_norm": 3.419351100921631, "learning_rate": 1.634220631471589e-07, "loss": 0.7889, "step": 26070 }, { "epoch": 0.9208747444688263, "grad_norm": 1.7521320581436157, "learning_rate": 1.6327704708387516e-07, "loss": 0.7316, "step": 26071 }, { "epoch": 0.9209100662725342, "grad_norm": 1.6073085069656372, "learning_rate": 1.631320943227127e-07, "loss": 0.7324, "step": 26072 }, { "epoch": 0.9209453880762422, "grad_norm": 1.718529462814331, "learning_rate": 1.6298720486556786e-07, "loss": 0.709, "step": 26073 }, { "epoch": 0.9209807098799501, "grad_norm": 1.5736218690872192, "learning_rate": 1.628423787143374e-07, "loss": 0.7461, "step": 26074 }, { "epoch": 0.921016031683658, "grad_norm": 1.6923741102218628, "learning_rate": 1.6269761587091647e-07, "loss": 0.768, "step": 26075 }, { "epoch": 0.9210513534873659, "grad_norm": 0.9375555515289307, "learning_rate": 1.625529163371997e-07, "loss": 0.5542, "step": 26076 }, { "epoch": 0.9210866752910737, "grad_norm": 1.6222087144851685, "learning_rate": 1.624082801150817e-07, "loss": 0.7515, "step": 26077 }, { "epoch": 0.9211219970947816, "grad_norm": 1.8353734016418457, "learning_rate": 1.622637072064537e-07, "loss": 0.7294, "step": 26078 }, { "epoch": 0.9211573188984895, "grad_norm": 0.9674031138420105, "learning_rate": 1.621191976132097e-07, "loss": 0.5587, "step": 26079 }, { "epoch": 0.9211926407021974, "grad_norm": 1.8266303539276123, "learning_rate": 1.619747513372405e-07, "loss": 0.7691, "step": 26080 }, { "epoch": 0.9212279625059053, "grad_norm": 1.6809260845184326, "learning_rate": 1.6183036838043508e-07, "loss": 0.7769, "step": 26081 }, { "epoch": 0.9212632843096132, "grad_norm": 1.7718944549560547, "learning_rate": 1.616860487446853e-07, "loss": 0.7894, "step": 26082 }, { "epoch": 0.9212986061133212, "grad_norm": 1.6536651849746704, "learning_rate": 1.6154179243187906e-07, "loss": 0.7535, "step": 26083 }, { "epoch": 0.9213339279170291, "grad_norm": 1.7481586933135986, "learning_rate": 1.6139759944390376e-07, "loss": 0.7722, "step": 26084 }, { "epoch": 0.921369249720737, "grad_norm": 1.5961054563522339, "learning_rate": 1.6125346978264733e-07, "loss": 0.7835, "step": 26085 }, { "epoch": 0.9214045715244449, "grad_norm": 2.1047189235687256, "learning_rate": 1.6110940344999604e-07, "loss": 0.7774, "step": 26086 }, { "epoch": 0.9214398933281528, "grad_norm": 1.897376298904419, "learning_rate": 1.609654004478356e-07, "loss": 0.7688, "step": 26087 }, { "epoch": 0.9214752151318607, "grad_norm": 1.6569441556930542, "learning_rate": 1.6082146077805006e-07, "loss": 0.7982, "step": 26088 }, { "epoch": 0.9215105369355686, "grad_norm": 1.8796969652175903, "learning_rate": 1.6067758444252292e-07, "loss": 0.7223, "step": 26089 }, { "epoch": 0.9215458587392765, "grad_norm": 3.1020023822784424, "learning_rate": 1.6053377144313875e-07, "loss": 0.7645, "step": 26090 }, { "epoch": 0.9215811805429844, "grad_norm": 1.6715387105941772, "learning_rate": 1.6039002178177832e-07, "loss": 0.7238, "step": 26091 }, { "epoch": 0.9216165023466923, "grad_norm": 1.8441115617752075, "learning_rate": 1.6024633546032343e-07, "loss": 0.7524, "step": 26092 }, { "epoch": 0.9216518241504003, "grad_norm": 2.269519805908203, "learning_rate": 1.6010271248065533e-07, "loss": 0.7748, "step": 26093 }, { "epoch": 0.9216871459541082, "grad_norm": 1.5497325658798218, "learning_rate": 1.5995915284465313e-07, "loss": 0.7558, "step": 26094 }, { "epoch": 0.9217224677578161, "grad_norm": 2.5454344749450684, "learning_rate": 1.5981565655419473e-07, "loss": 0.7494, "step": 26095 }, { "epoch": 0.921757789561524, "grad_norm": 2.033474922180176, "learning_rate": 1.596722236111603e-07, "loss": 0.8178, "step": 26096 }, { "epoch": 0.9217931113652319, "grad_norm": 1.6438030004501343, "learning_rate": 1.5952885401742558e-07, "loss": 0.7542, "step": 26097 }, { "epoch": 0.9218284331689398, "grad_norm": 1.5186183452606201, "learning_rate": 1.5938554777486682e-07, "loss": 0.7357, "step": 26098 }, { "epoch": 0.9218637549726477, "grad_norm": 1.523798942565918, "learning_rate": 1.5924230488536085e-07, "loss": 0.7505, "step": 26099 }, { "epoch": 0.9218990767763556, "grad_norm": 2.0920372009277344, "learning_rate": 1.590991253507812e-07, "loss": 0.7793, "step": 26100 }, { "epoch": 0.9219343985800635, "grad_norm": 1.7240673303604126, "learning_rate": 1.5895600917300192e-07, "loss": 0.7445, "step": 26101 }, { "epoch": 0.9219697203837715, "grad_norm": 1.5484668016433716, "learning_rate": 1.5881295635389704e-07, "loss": 0.7619, "step": 26102 }, { "epoch": 0.9220050421874793, "grad_norm": 1.5909221172332764, "learning_rate": 1.5866996689533732e-07, "loss": 0.7707, "step": 26103 }, { "epoch": 0.9220403639911872, "grad_norm": 1.6324578523635864, "learning_rate": 1.5852704079919566e-07, "loss": 0.7618, "step": 26104 }, { "epoch": 0.9220756857948951, "grad_norm": 1.507686734199524, "learning_rate": 1.5838417806734175e-07, "loss": 0.7436, "step": 26105 }, { "epoch": 0.922111007598603, "grad_norm": 1.6774983406066895, "learning_rate": 1.5824137870164624e-07, "loss": 0.7265, "step": 26106 }, { "epoch": 0.9221463294023109, "grad_norm": 1.7034528255462646, "learning_rate": 1.5809864270397656e-07, "loss": 0.7872, "step": 26107 }, { "epoch": 0.9221816512060188, "grad_norm": 1.6688697338104248, "learning_rate": 1.5795597007620177e-07, "loss": 0.7813, "step": 26108 }, { "epoch": 0.9222169730097267, "grad_norm": 1.879863977432251, "learning_rate": 1.5781336082018983e-07, "loss": 0.7512, "step": 26109 }, { "epoch": 0.9222522948134346, "grad_norm": 1.632934808731079, "learning_rate": 1.5767081493780534e-07, "loss": 0.7629, "step": 26110 }, { "epoch": 0.9222876166171425, "grad_norm": 1.7174092531204224, "learning_rate": 1.5752833243091514e-07, "loss": 0.7901, "step": 26111 }, { "epoch": 0.9223229384208504, "grad_norm": 1.6332662105560303, "learning_rate": 1.5738591330138441e-07, "loss": 0.736, "step": 26112 }, { "epoch": 0.9223582602245584, "grad_norm": 1.7237927913665771, "learning_rate": 1.5724355755107612e-07, "loss": 0.7542, "step": 26113 }, { "epoch": 0.9223935820282663, "grad_norm": 1.5586018562316895, "learning_rate": 1.5710126518185375e-07, "loss": 0.7455, "step": 26114 }, { "epoch": 0.9224289038319742, "grad_norm": 1.7795509099960327, "learning_rate": 1.5695903619558027e-07, "loss": 0.7767, "step": 26115 }, { "epoch": 0.9224642256356821, "grad_norm": 2.7689733505249023, "learning_rate": 1.5681687059411587e-07, "loss": 0.802, "step": 26116 }, { "epoch": 0.92249954743939, "grad_norm": 1.7700403928756714, "learning_rate": 1.5667476837932184e-07, "loss": 0.7996, "step": 26117 }, { "epoch": 0.9225348692430979, "grad_norm": 1.946614146232605, "learning_rate": 1.5653272955305832e-07, "loss": 0.7239, "step": 26118 }, { "epoch": 0.9225701910468058, "grad_norm": 1.5910638570785522, "learning_rate": 1.5639075411718385e-07, "loss": 0.7543, "step": 26119 }, { "epoch": 0.9226055128505137, "grad_norm": 3.775601625442505, "learning_rate": 1.562488420735564e-07, "loss": 0.7464, "step": 26120 }, { "epoch": 0.9226408346542216, "grad_norm": 2.051316499710083, "learning_rate": 1.5610699342403392e-07, "loss": 0.7298, "step": 26121 }, { "epoch": 0.9226761564579296, "grad_norm": 1.8588624000549316, "learning_rate": 1.5596520817047268e-07, "loss": 0.765, "step": 26122 }, { "epoch": 0.9227114782616375, "grad_norm": 1.664886713027954, "learning_rate": 1.5582348631472843e-07, "loss": 0.7998, "step": 26123 }, { "epoch": 0.9227468000653454, "grad_norm": 1.8345582485198975, "learning_rate": 1.5568182785865526e-07, "loss": 0.7485, "step": 26124 }, { "epoch": 0.9227821218690533, "grad_norm": 1.766992449760437, "learning_rate": 1.555402328041078e-07, "loss": 0.7556, "step": 26125 }, { "epoch": 0.9228174436727612, "grad_norm": 1.8385626077651978, "learning_rate": 1.5539870115293897e-07, "loss": 0.7726, "step": 26126 }, { "epoch": 0.9228527654764691, "grad_norm": 1.6895097494125366, "learning_rate": 1.5525723290700123e-07, "loss": 0.7756, "step": 26127 }, { "epoch": 0.922888087280177, "grad_norm": 1.769263744354248, "learning_rate": 1.5511582806814696e-07, "loss": 0.742, "step": 26128 }, { "epoch": 0.9229234090838848, "grad_norm": 2.0207324028015137, "learning_rate": 1.5497448663822523e-07, "loss": 0.7621, "step": 26129 }, { "epoch": 0.9229587308875927, "grad_norm": 1.7013134956359863, "learning_rate": 1.5483320861908625e-07, "loss": 0.744, "step": 26130 }, { "epoch": 0.9229940526913006, "grad_norm": 2.0070605278015137, "learning_rate": 1.5469199401258071e-07, "loss": 0.7711, "step": 26131 }, { "epoch": 0.9230293744950085, "grad_norm": 1.9785213470458984, "learning_rate": 1.5455084282055443e-07, "loss": 0.7338, "step": 26132 }, { "epoch": 0.9230646962987165, "grad_norm": 1.6602438688278198, "learning_rate": 1.544097550448559e-07, "loss": 0.7712, "step": 26133 }, { "epoch": 0.9231000181024244, "grad_norm": 2.171300172805786, "learning_rate": 1.5426873068733194e-07, "loss": 0.8002, "step": 26134 }, { "epoch": 0.9231353399061323, "grad_norm": 1.7010637521743774, "learning_rate": 1.5412776974982834e-07, "loss": 0.757, "step": 26135 }, { "epoch": 0.9231706617098402, "grad_norm": 1.685032844543457, "learning_rate": 1.5398687223418862e-07, "loss": 0.7429, "step": 26136 }, { "epoch": 0.9232059835135481, "grad_norm": 1.676351547241211, "learning_rate": 1.5384603814225851e-07, "loss": 0.7604, "step": 26137 }, { "epoch": 0.923241305317256, "grad_norm": 2.0791165828704834, "learning_rate": 1.5370526747588045e-07, "loss": 0.7975, "step": 26138 }, { "epoch": 0.9232766271209639, "grad_norm": 1.618793249130249, "learning_rate": 1.5356456023689625e-07, "loss": 0.7809, "step": 26139 }, { "epoch": 0.9233119489246718, "grad_norm": 1.7569924592971802, "learning_rate": 1.534239164271484e-07, "loss": 0.7971, "step": 26140 }, { "epoch": 0.9233472707283797, "grad_norm": 1.9591041803359985, "learning_rate": 1.5328333604847812e-07, "loss": 0.8122, "step": 26141 }, { "epoch": 0.9233825925320877, "grad_norm": 1.879087209701538, "learning_rate": 1.53142819102724e-07, "loss": 0.7784, "step": 26142 }, { "epoch": 0.9234179143357956, "grad_norm": 1.7030019760131836, "learning_rate": 1.530023655917251e-07, "loss": 0.7545, "step": 26143 }, { "epoch": 0.9234532361395035, "grad_norm": 1.705148696899414, "learning_rate": 1.5286197551731997e-07, "loss": 0.7292, "step": 26144 }, { "epoch": 0.9234885579432114, "grad_norm": 1.873867392539978, "learning_rate": 1.5272164888134655e-07, "loss": 0.773, "step": 26145 }, { "epoch": 0.9235238797469193, "grad_norm": 1.839128017425537, "learning_rate": 1.5258138568564061e-07, "loss": 0.7681, "step": 26146 }, { "epoch": 0.9235592015506272, "grad_norm": 1.6226677894592285, "learning_rate": 1.524411859320385e-07, "loss": 0.7349, "step": 26147 }, { "epoch": 0.9235945233543351, "grad_norm": 1.8114804029464722, "learning_rate": 1.5230104962237535e-07, "loss": 0.7652, "step": 26148 }, { "epoch": 0.923629845158043, "grad_norm": 1.615456223487854, "learning_rate": 1.5216097675848418e-07, "loss": 0.7457, "step": 26149 }, { "epoch": 0.9236651669617509, "grad_norm": 1.6475331783294678, "learning_rate": 1.5202096734219906e-07, "loss": 0.7597, "step": 26150 }, { "epoch": 0.9237004887654588, "grad_norm": 1.9705435037612915, "learning_rate": 1.5188102137535244e-07, "loss": 0.7787, "step": 26151 }, { "epoch": 0.9237358105691668, "grad_norm": 1.7297896146774292, "learning_rate": 1.5174113885977503e-07, "loss": 0.7581, "step": 26152 }, { "epoch": 0.9237711323728747, "grad_norm": 1.6978336572647095, "learning_rate": 1.5160131979729875e-07, "loss": 0.7749, "step": 26153 }, { "epoch": 0.9238064541765826, "grad_norm": 1.6927127838134766, "learning_rate": 1.514615641897532e-07, "loss": 0.7655, "step": 26154 }, { "epoch": 0.9238417759802904, "grad_norm": 1.7256101369857788, "learning_rate": 1.5132187203896642e-07, "loss": 0.7836, "step": 26155 }, { "epoch": 0.9238770977839983, "grad_norm": 1.677894115447998, "learning_rate": 1.51182243346768e-07, "loss": 0.8219, "step": 26156 }, { "epoch": 0.9239124195877062, "grad_norm": 1.771924376487732, "learning_rate": 1.5104267811498542e-07, "loss": 0.7337, "step": 26157 }, { "epoch": 0.9239477413914141, "grad_norm": 1.7670650482177734, "learning_rate": 1.5090317634544438e-07, "loss": 0.769, "step": 26158 }, { "epoch": 0.923983063195122, "grad_norm": 1.8008055686950684, "learning_rate": 1.5076373803997068e-07, "loss": 0.7658, "step": 26159 }, { "epoch": 0.9240183849988299, "grad_norm": 1.485221028327942, "learning_rate": 1.5062436320039008e-07, "loss": 0.7495, "step": 26160 }, { "epoch": 0.9240537068025378, "grad_norm": 1.6770811080932617, "learning_rate": 1.504850518285267e-07, "loss": 0.7712, "step": 26161 }, { "epoch": 0.9240890286062458, "grad_norm": 1.8927146196365356, "learning_rate": 1.5034580392620235e-07, "loss": 0.8208, "step": 26162 }, { "epoch": 0.9241243504099537, "grad_norm": 1.9568703174591064, "learning_rate": 1.5020661949524061e-07, "loss": 0.771, "step": 26163 }, { "epoch": 0.9241596722136616, "grad_norm": 1.7133716344833374, "learning_rate": 1.500674985374634e-07, "loss": 0.7517, "step": 26164 }, { "epoch": 0.9241949940173695, "grad_norm": 1.8045037984848022, "learning_rate": 1.4992844105469086e-07, "loss": 0.7743, "step": 26165 }, { "epoch": 0.9242303158210774, "grad_norm": 1.7284575700759888, "learning_rate": 1.4978944704874264e-07, "loss": 0.7647, "step": 26166 }, { "epoch": 0.9242656376247853, "grad_norm": 1.9229223728179932, "learning_rate": 1.4965051652143958e-07, "loss": 0.7848, "step": 26167 }, { "epoch": 0.9243009594284932, "grad_norm": 1.7031322717666626, "learning_rate": 1.4951164947459795e-07, "loss": 0.815, "step": 26168 }, { "epoch": 0.9243362812322011, "grad_norm": 1.6631964445114136, "learning_rate": 1.493728459100363e-07, "loss": 0.7587, "step": 26169 }, { "epoch": 0.924371603035909, "grad_norm": 1.7274893522262573, "learning_rate": 1.4923410582957097e-07, "loss": 0.7578, "step": 26170 }, { "epoch": 0.924406924839617, "grad_norm": 1.6540284156799316, "learning_rate": 1.4909542923501773e-07, "loss": 0.7196, "step": 26171 }, { "epoch": 0.9244422466433249, "grad_norm": 1.95914888381958, "learning_rate": 1.489568161281918e-07, "loss": 0.802, "step": 26172 }, { "epoch": 0.9244775684470328, "grad_norm": 1.686979055404663, "learning_rate": 1.4881826651090726e-07, "loss": 0.7417, "step": 26173 }, { "epoch": 0.9245128902507407, "grad_norm": 1.7096481323242188, "learning_rate": 1.486797803849771e-07, "loss": 0.7422, "step": 26174 }, { "epoch": 0.9245482120544486, "grad_norm": 1.86074960231781, "learning_rate": 1.4854135775221378e-07, "loss": 0.7973, "step": 26175 }, { "epoch": 0.9245835338581565, "grad_norm": 1.6700869798660278, "learning_rate": 1.4840299861442976e-07, "loss": 0.7702, "step": 26176 }, { "epoch": 0.9246188556618644, "grad_norm": 1.7536444664001465, "learning_rate": 1.482647029734352e-07, "loss": 0.7865, "step": 26177 }, { "epoch": 0.9246541774655723, "grad_norm": 1.606791615486145, "learning_rate": 1.4812647083103982e-07, "loss": 0.7576, "step": 26178 }, { "epoch": 0.9246894992692802, "grad_norm": 1.6470427513122559, "learning_rate": 1.479883021890538e-07, "loss": 0.7581, "step": 26179 }, { "epoch": 0.9247248210729881, "grad_norm": 1.6118026971817017, "learning_rate": 1.478501970492846e-07, "loss": 0.7297, "step": 26180 }, { "epoch": 0.924760142876696, "grad_norm": 1.6143410205841064, "learning_rate": 1.4771215541353967e-07, "loss": 0.7298, "step": 26181 }, { "epoch": 0.9247954646804039, "grad_norm": 1.9162524938583374, "learning_rate": 1.4757417728362533e-07, "loss": 0.745, "step": 26182 }, { "epoch": 0.9248307864841118, "grad_norm": 1.7585501670837402, "learning_rate": 1.4743626266134903e-07, "loss": 0.7687, "step": 26183 }, { "epoch": 0.9248661082878197, "grad_norm": 1.722681999206543, "learning_rate": 1.4729841154851433e-07, "loss": 0.7547, "step": 26184 }, { "epoch": 0.9249014300915276, "grad_norm": 1.8751314878463745, "learning_rate": 1.4716062394692533e-07, "loss": 0.784, "step": 26185 }, { "epoch": 0.9249367518952355, "grad_norm": 1.8460502624511719, "learning_rate": 1.470228998583867e-07, "loss": 0.7811, "step": 26186 }, { "epoch": 0.9249720736989434, "grad_norm": 1.8944165706634521, "learning_rate": 1.4688523928469977e-07, "loss": 0.7364, "step": 26187 }, { "epoch": 0.9250073955026513, "grad_norm": 2.049107789993286, "learning_rate": 1.4674764222766592e-07, "loss": 0.7623, "step": 26188 }, { "epoch": 0.9250427173063592, "grad_norm": 1.8061256408691406, "learning_rate": 1.4661010868908754e-07, "loss": 0.7575, "step": 26189 }, { "epoch": 0.9250780391100671, "grad_norm": 1.6800212860107422, "learning_rate": 1.4647263867076322e-07, "loss": 0.7356, "step": 26190 }, { "epoch": 0.925113360913775, "grad_norm": 2.0817768573760986, "learning_rate": 1.4633523217449264e-07, "loss": 0.7381, "step": 26191 }, { "epoch": 0.925148682717483, "grad_norm": 1.6022241115570068, "learning_rate": 1.4619788920207434e-07, "loss": 0.7841, "step": 26192 }, { "epoch": 0.9251840045211909, "grad_norm": 1.5863455533981323, "learning_rate": 1.4606060975530578e-07, "loss": 0.7387, "step": 26193 }, { "epoch": 0.9252193263248988, "grad_norm": 1.5223912000656128, "learning_rate": 1.459233938359833e-07, "loss": 0.7436, "step": 26194 }, { "epoch": 0.9252546481286067, "grad_norm": 1.696445107460022, "learning_rate": 1.457862414459027e-07, "loss": 0.7704, "step": 26195 }, { "epoch": 0.9252899699323146, "grad_norm": 1.6430386304855347, "learning_rate": 1.4564915258685974e-07, "loss": 0.7847, "step": 26196 }, { "epoch": 0.9253252917360225, "grad_norm": 0.9666505455970764, "learning_rate": 1.4551212726064855e-07, "loss": 0.551, "step": 26197 }, { "epoch": 0.9253606135397304, "grad_norm": 1.8034093379974365, "learning_rate": 1.453751654690616e-07, "loss": 0.7802, "step": 26198 }, { "epoch": 0.9253959353434383, "grad_norm": 1.6151889562606812, "learning_rate": 1.452382672138919e-07, "loss": 0.7619, "step": 26199 }, { "epoch": 0.9254312571471462, "grad_norm": 1.6003915071487427, "learning_rate": 1.4510143249693075e-07, "loss": 0.7457, "step": 26200 }, { "epoch": 0.9254665789508542, "grad_norm": 1.8291411399841309, "learning_rate": 1.449646613199701e-07, "loss": 0.7969, "step": 26201 }, { "epoch": 0.9255019007545621, "grad_norm": 1.5899477005004883, "learning_rate": 1.4482795368479852e-07, "loss": 0.7761, "step": 26202 }, { "epoch": 0.92553722255827, "grad_norm": 1.7157245874404907, "learning_rate": 1.4469130959320676e-07, "loss": 0.7649, "step": 26203 }, { "epoch": 0.9255725443619779, "grad_norm": 2.2760818004608154, "learning_rate": 1.445547290469823e-07, "loss": 0.7834, "step": 26204 }, { "epoch": 0.9256078661656858, "grad_norm": 1.7307205200195312, "learning_rate": 1.4441821204791262e-07, "loss": 0.7703, "step": 26205 }, { "epoch": 0.9256431879693937, "grad_norm": 1.7264939546585083, "learning_rate": 1.4428175859778515e-07, "loss": 0.7411, "step": 26206 }, { "epoch": 0.9256785097731015, "grad_norm": 2.335038423538208, "learning_rate": 1.4414536869838458e-07, "loss": 0.723, "step": 26207 }, { "epoch": 0.9257138315768094, "grad_norm": 1.6002230644226074, "learning_rate": 1.440090423514967e-07, "loss": 0.7704, "step": 26208 }, { "epoch": 0.9257491533805173, "grad_norm": 1.7181285619735718, "learning_rate": 1.4387277955890622e-07, "loss": 0.7666, "step": 26209 }, { "epoch": 0.9257844751842252, "grad_norm": 2.4467546939849854, "learning_rate": 1.43736580322395e-07, "loss": 0.7614, "step": 26210 }, { "epoch": 0.9258197969879332, "grad_norm": 1.5831354856491089, "learning_rate": 1.4360044464374722e-07, "loss": 0.7682, "step": 26211 }, { "epoch": 0.9258551187916411, "grad_norm": 3.7802746295928955, "learning_rate": 1.434643725247442e-07, "loss": 0.767, "step": 26212 }, { "epoch": 0.925890440595349, "grad_norm": 1.6854034662246704, "learning_rate": 1.4332836396716566e-07, "loss": 0.7783, "step": 26213 }, { "epoch": 0.9259257623990569, "grad_norm": 1.7596211433410645, "learning_rate": 1.431924189727929e-07, "loss": 0.7315, "step": 26214 }, { "epoch": 0.9259610842027648, "grad_norm": 1.728769302368164, "learning_rate": 1.430565375434051e-07, "loss": 0.7932, "step": 26215 }, { "epoch": 0.9259964060064727, "grad_norm": 1.5939518213272095, "learning_rate": 1.4292071968078081e-07, "loss": 0.7957, "step": 26216 }, { "epoch": 0.9260317278101806, "grad_norm": 1.503787636756897, "learning_rate": 1.4278496538669584e-07, "loss": 0.7734, "step": 26217 }, { "epoch": 0.9260670496138885, "grad_norm": 1.9695807695388794, "learning_rate": 1.4264927466292878e-07, "loss": 0.7333, "step": 26218 }, { "epoch": 0.9261023714175964, "grad_norm": 2.69909405708313, "learning_rate": 1.425136475112554e-07, "loss": 0.7472, "step": 26219 }, { "epoch": 0.9261376932213043, "grad_norm": 1.6573745012283325, "learning_rate": 1.4237808393344933e-07, "loss": 0.7775, "step": 26220 }, { "epoch": 0.9261730150250123, "grad_norm": 1.7950646877288818, "learning_rate": 1.4224258393128632e-07, "loss": 0.8265, "step": 26221 }, { "epoch": 0.9262083368287202, "grad_norm": 1.59308922290802, "learning_rate": 1.4210714750653943e-07, "loss": 0.7161, "step": 26222 }, { "epoch": 0.9262436586324281, "grad_norm": 1.6210275888442993, "learning_rate": 1.4197177466098054e-07, "loss": 0.7471, "step": 26223 }, { "epoch": 0.926278980436136, "grad_norm": 0.9674255847930908, "learning_rate": 1.4183646539638163e-07, "loss": 0.6015, "step": 26224 }, { "epoch": 0.9263143022398439, "grad_norm": 1.9015367031097412, "learning_rate": 1.4170121971451457e-07, "loss": 0.7469, "step": 26225 }, { "epoch": 0.9263496240435518, "grad_norm": 2.1775763034820557, "learning_rate": 1.4156603761714848e-07, "loss": 0.7598, "step": 26226 }, { "epoch": 0.9263849458472597, "grad_norm": 1.821469783782959, "learning_rate": 1.41430919106052e-07, "loss": 0.7736, "step": 26227 }, { "epoch": 0.9264202676509676, "grad_norm": 1.8447766304016113, "learning_rate": 1.4129586418299533e-07, "loss": 0.7768, "step": 26228 }, { "epoch": 0.9264555894546755, "grad_norm": 1.6504956483840942, "learning_rate": 1.411608728497449e-07, "loss": 0.7628, "step": 26229 }, { "epoch": 0.9264909112583835, "grad_norm": 2.5392208099365234, "learning_rate": 1.41025945108067e-07, "loss": 0.7699, "step": 26230 }, { "epoch": 0.9265262330620914, "grad_norm": 1.675794243812561, "learning_rate": 1.4089108095972913e-07, "loss": 0.745, "step": 26231 }, { "epoch": 0.9265615548657993, "grad_norm": 1.6642297506332397, "learning_rate": 1.4075628040649437e-07, "loss": 0.7294, "step": 26232 }, { "epoch": 0.9265968766695071, "grad_norm": 1.8754260540008545, "learning_rate": 1.4062154345012846e-07, "loss": 0.7279, "step": 26233 }, { "epoch": 0.926632198473215, "grad_norm": 1.67411470413208, "learning_rate": 1.4048687009239447e-07, "loss": 0.7882, "step": 26234 }, { "epoch": 0.9266675202769229, "grad_norm": 1.611330270767212, "learning_rate": 1.403522603350549e-07, "loss": 0.7396, "step": 26235 }, { "epoch": 0.9267028420806308, "grad_norm": 1.8183308839797974, "learning_rate": 1.4021771417987106e-07, "loss": 0.7539, "step": 26236 }, { "epoch": 0.9267381638843387, "grad_norm": 1.8040162324905396, "learning_rate": 1.4008323162860437e-07, "loss": 0.7401, "step": 26237 }, { "epoch": 0.9267734856880466, "grad_norm": 1.54576575756073, "learning_rate": 1.3994881268301509e-07, "loss": 0.7388, "step": 26238 }, { "epoch": 0.9268088074917545, "grad_norm": 1.774498462677002, "learning_rate": 1.3981445734486176e-07, "loss": 0.7448, "step": 26239 }, { "epoch": 0.9268441292954624, "grad_norm": 1.825918436050415, "learning_rate": 1.3968016561590302e-07, "loss": 0.7546, "step": 26240 }, { "epoch": 0.9268794510991704, "grad_norm": 2.124725103378296, "learning_rate": 1.3954593749789746e-07, "loss": 0.7418, "step": 26241 }, { "epoch": 0.9269147729028783, "grad_norm": 1.7117735147476196, "learning_rate": 1.3941177299260035e-07, "loss": 0.7923, "step": 26242 }, { "epoch": 0.9269500947065862, "grad_norm": 2.0435609817504883, "learning_rate": 1.39277672101768e-07, "loss": 0.7515, "step": 26243 }, { "epoch": 0.9269854165102941, "grad_norm": 1.7338242530822754, "learning_rate": 1.3914363482715633e-07, "loss": 0.7946, "step": 26244 }, { "epoch": 0.927020738314002, "grad_norm": 1.7687727212905884, "learning_rate": 1.3900966117051884e-07, "loss": 0.7819, "step": 26245 }, { "epoch": 0.9270560601177099, "grad_norm": 1.6647193431854248, "learning_rate": 1.3887575113360862e-07, "loss": 0.7659, "step": 26246 }, { "epoch": 0.9270913819214178, "grad_norm": 1.6312994956970215, "learning_rate": 1.3874190471817984e-07, "loss": 0.7783, "step": 26247 }, { "epoch": 0.9271267037251257, "grad_norm": 1.525533676147461, "learning_rate": 1.3860812192598217e-07, "loss": 0.7224, "step": 26248 }, { "epoch": 0.9271620255288336, "grad_norm": 1.8084818124771118, "learning_rate": 1.3847440275876812e-07, "loss": 0.7637, "step": 26249 }, { "epoch": 0.9271973473325416, "grad_norm": 1.729163646697998, "learning_rate": 1.3834074721828684e-07, "loss": 0.7502, "step": 26250 }, { "epoch": 0.9272326691362495, "grad_norm": 1.9508318901062012, "learning_rate": 1.382071553062886e-07, "loss": 0.7612, "step": 26251 }, { "epoch": 0.9272679909399574, "grad_norm": 2.0914783477783203, "learning_rate": 1.3807362702452144e-07, "loss": 0.7805, "step": 26252 }, { "epoch": 0.9273033127436653, "grad_norm": 1.785444974899292, "learning_rate": 1.3794016237473173e-07, "loss": 0.776, "step": 26253 }, { "epoch": 0.9273386345473732, "grad_norm": 1.7973501682281494, "learning_rate": 1.378067613586681e-07, "loss": 0.7509, "step": 26254 }, { "epoch": 0.9273739563510811, "grad_norm": 1.824502944946289, "learning_rate": 1.376734239780747e-07, "loss": 0.7495, "step": 26255 }, { "epoch": 0.927409278154789, "grad_norm": 1.8284945487976074, "learning_rate": 1.3754015023469791e-07, "loss": 0.7661, "step": 26256 }, { "epoch": 0.9274445999584969, "grad_norm": 1.683701753616333, "learning_rate": 1.3740694013028187e-07, "loss": 0.7827, "step": 26257 }, { "epoch": 0.9274799217622048, "grad_norm": 1.6849297285079956, "learning_rate": 1.3727379366656912e-07, "loss": 0.7517, "step": 26258 }, { "epoch": 0.9275152435659126, "grad_norm": 1.8478403091430664, "learning_rate": 1.3714071084530267e-07, "loss": 0.7822, "step": 26259 }, { "epoch": 0.9275505653696205, "grad_norm": 1.6738430261611938, "learning_rate": 1.3700769166822503e-07, "loss": 0.7757, "step": 26260 }, { "epoch": 0.9275858871733285, "grad_norm": 1.9927377700805664, "learning_rate": 1.3687473613707702e-07, "loss": 0.732, "step": 26261 }, { "epoch": 0.9276212089770364, "grad_norm": 1.6123647689819336, "learning_rate": 1.3674184425359725e-07, "loss": 0.7482, "step": 26262 }, { "epoch": 0.9276565307807443, "grad_norm": 1.8609496355056763, "learning_rate": 1.3660901601952658e-07, "loss": 0.7466, "step": 26263 }, { "epoch": 0.9276918525844522, "grad_norm": 1.6404709815979004, "learning_rate": 1.3647625143660305e-07, "loss": 0.7192, "step": 26264 }, { "epoch": 0.9277271743881601, "grad_norm": 1.7747269868850708, "learning_rate": 1.363435505065641e-07, "loss": 0.7963, "step": 26265 }, { "epoch": 0.927762496191868, "grad_norm": 1.7605907917022705, "learning_rate": 1.3621091323114622e-07, "loss": 0.7709, "step": 26266 }, { "epoch": 0.9277978179955759, "grad_norm": 1.708150029182434, "learning_rate": 1.3607833961208572e-07, "loss": 0.7597, "step": 26267 }, { "epoch": 0.9278331397992838, "grad_norm": 1.7241201400756836, "learning_rate": 1.3594582965111736e-07, "loss": 0.7694, "step": 26268 }, { "epoch": 0.9278684616029917, "grad_norm": 1.7752761840820312, "learning_rate": 1.3581338334997584e-07, "loss": 0.7786, "step": 26269 }, { "epoch": 0.9279037834066997, "grad_norm": 1.737970232963562, "learning_rate": 1.3568100071039482e-07, "loss": 0.7504, "step": 26270 }, { "epoch": 0.9279391052104076, "grad_norm": 1.7102880477905273, "learning_rate": 1.3554868173410617e-07, "loss": 0.787, "step": 26271 }, { "epoch": 0.9279744270141155, "grad_norm": 1.497296690940857, "learning_rate": 1.3541642642284193e-07, "loss": 0.739, "step": 26272 }, { "epoch": 0.9280097488178234, "grad_norm": 2.8192906379699707, "learning_rate": 1.3528423477833287e-07, "loss": 0.7627, "step": 26273 }, { "epoch": 0.9280450706215313, "grad_norm": 1.6666882038116455, "learning_rate": 1.3515210680230982e-07, "loss": 0.7699, "step": 26274 }, { "epoch": 0.9280803924252392, "grad_norm": 1.7702428102493286, "learning_rate": 1.350200424965009e-07, "loss": 0.7694, "step": 26275 }, { "epoch": 0.9281157142289471, "grad_norm": 1.6876256465911865, "learning_rate": 1.3488804186263527e-07, "loss": 0.7613, "step": 26276 }, { "epoch": 0.928151036032655, "grad_norm": 1.7520750761032104, "learning_rate": 1.347561049024404e-07, "loss": 0.7678, "step": 26277 }, { "epoch": 0.9281863578363629, "grad_norm": 1.8666441440582275, "learning_rate": 1.3462423161764326e-07, "loss": 0.7999, "step": 26278 }, { "epoch": 0.9282216796400709, "grad_norm": 1.5982732772827148, "learning_rate": 1.3449242200996916e-07, "loss": 0.7617, "step": 26279 }, { "epoch": 0.9282570014437788, "grad_norm": 1.7065722942352295, "learning_rate": 1.3436067608114444e-07, "loss": 0.7601, "step": 26280 }, { "epoch": 0.9282923232474867, "grad_norm": 1.6272764205932617, "learning_rate": 1.3422899383289168e-07, "loss": 0.7462, "step": 26281 }, { "epoch": 0.9283276450511946, "grad_norm": 1.767781376838684, "learning_rate": 1.3409737526693555e-07, "loss": 0.7581, "step": 26282 }, { "epoch": 0.9283629668549025, "grad_norm": 1.7217680215835571, "learning_rate": 1.339658203849986e-07, "loss": 0.7746, "step": 26283 }, { "epoch": 0.9283982886586104, "grad_norm": 1.6758097410202026, "learning_rate": 1.338343291888017e-07, "loss": 0.7579, "step": 26284 }, { "epoch": 0.9284336104623182, "grad_norm": 1.626785159111023, "learning_rate": 1.3370290168006616e-07, "loss": 0.7456, "step": 26285 }, { "epoch": 0.9284689322660261, "grad_norm": 1.5599981546401978, "learning_rate": 1.3357153786051346e-07, "loss": 0.7506, "step": 26286 }, { "epoch": 0.928504254069734, "grad_norm": 1.689471960067749, "learning_rate": 1.3344023773186055e-07, "loss": 0.7566, "step": 26287 }, { "epoch": 0.9285395758734419, "grad_norm": 2.46329402923584, "learning_rate": 1.3330900129582713e-07, "loss": 0.7697, "step": 26288 }, { "epoch": 0.9285748976771498, "grad_norm": 2.064275026321411, "learning_rate": 1.331778285541313e-07, "loss": 0.7788, "step": 26289 }, { "epoch": 0.9286102194808578, "grad_norm": 1.606336236000061, "learning_rate": 1.3304671950848947e-07, "loss": 0.7583, "step": 26290 }, { "epoch": 0.9286455412845657, "grad_norm": 2.01444411277771, "learning_rate": 1.3291567416061635e-07, "loss": 0.7387, "step": 26291 }, { "epoch": 0.9286808630882736, "grad_norm": 1.719683051109314, "learning_rate": 1.327846925122278e-07, "loss": 0.7468, "step": 26292 }, { "epoch": 0.9287161848919815, "grad_norm": 2.0335729122161865, "learning_rate": 1.3265377456503913e-07, "loss": 0.7505, "step": 26293 }, { "epoch": 0.9287515066956894, "grad_norm": 3.18585467338562, "learning_rate": 1.325229203207623e-07, "loss": 0.7469, "step": 26294 }, { "epoch": 0.9287868284993973, "grad_norm": 1.673546552658081, "learning_rate": 1.3239212978111094e-07, "loss": 0.7471, "step": 26295 }, { "epoch": 0.9288221503031052, "grad_norm": 1.6045386791229248, "learning_rate": 1.3226140294779643e-07, "loss": 0.7586, "step": 26296 }, { "epoch": 0.9288574721068131, "grad_norm": 1.7526662349700928, "learning_rate": 1.3213073982252912e-07, "loss": 0.7338, "step": 26297 }, { "epoch": 0.928892793910521, "grad_norm": 1.6964023113250732, "learning_rate": 1.3200014040701981e-07, "loss": 0.7525, "step": 26298 }, { "epoch": 0.928928115714229, "grad_norm": 1.6170547008514404, "learning_rate": 1.318696047029777e-07, "loss": 0.782, "step": 26299 }, { "epoch": 0.9289634375179369, "grad_norm": 1.6925479173660278, "learning_rate": 1.317391327121109e-07, "loss": 0.7475, "step": 26300 }, { "epoch": 0.9289987593216448, "grad_norm": 1.6967681646347046, "learning_rate": 1.3160872443612694e-07, "loss": 0.7622, "step": 26301 }, { "epoch": 0.9290340811253527, "grad_norm": 1.8355783224105835, "learning_rate": 1.3147837987673385e-07, "loss": 0.758, "step": 26302 }, { "epoch": 0.9290694029290606, "grad_norm": 1.5872572660446167, "learning_rate": 1.3134809903563582e-07, "loss": 0.754, "step": 26303 }, { "epoch": 0.9291047247327685, "grad_norm": 1.8061163425445557, "learning_rate": 1.3121788191453821e-07, "loss": 0.7573, "step": 26304 }, { "epoch": 0.9291400465364764, "grad_norm": 1.6654407978057861, "learning_rate": 1.3108772851514684e-07, "loss": 0.7855, "step": 26305 }, { "epoch": 0.9291753683401843, "grad_norm": 1.5816709995269775, "learning_rate": 1.3095763883916312e-07, "loss": 0.7314, "step": 26306 }, { "epoch": 0.9292106901438922, "grad_norm": 1.701985239982605, "learning_rate": 1.308276128882907e-07, "loss": 0.7172, "step": 26307 }, { "epoch": 0.9292460119476001, "grad_norm": 1.0130293369293213, "learning_rate": 1.3069765066423156e-07, "loss": 0.5764, "step": 26308 }, { "epoch": 0.9292813337513081, "grad_norm": 1.6064907312393188, "learning_rate": 1.3056775216868657e-07, "loss": 0.7456, "step": 26309 }, { "epoch": 0.929316655555016, "grad_norm": 1.9337888956069946, "learning_rate": 1.3043791740335488e-07, "loss": 0.7624, "step": 26310 }, { "epoch": 0.9293519773587238, "grad_norm": 1.7331862449645996, "learning_rate": 1.3030814636993627e-07, "loss": 0.8151, "step": 26311 }, { "epoch": 0.9293872991624317, "grad_norm": 1.8098407983779907, "learning_rate": 1.301784390701294e-07, "loss": 0.7755, "step": 26312 }, { "epoch": 0.9294226209661396, "grad_norm": 1.6963069438934326, "learning_rate": 1.3004879550563176e-07, "loss": 0.7454, "step": 26313 }, { "epoch": 0.9294579427698475, "grad_norm": 1.7312995195388794, "learning_rate": 1.2991921567813982e-07, "loss": 0.7518, "step": 26314 }, { "epoch": 0.9294932645735554, "grad_norm": 1.5795851945877075, "learning_rate": 1.2978969958934995e-07, "loss": 0.7466, "step": 26315 }, { "epoch": 0.9295285863772633, "grad_norm": 1.6945388317108154, "learning_rate": 1.2966024724095693e-07, "loss": 0.7798, "step": 26316 }, { "epoch": 0.9295639081809712, "grad_norm": 2.672436237335205, "learning_rate": 1.2953085863465498e-07, "loss": 0.7621, "step": 26317 }, { "epoch": 0.9295992299846791, "grad_norm": 1.7500417232513428, "learning_rate": 1.2940153377213717e-07, "loss": 0.7469, "step": 26318 }, { "epoch": 0.929634551788387, "grad_norm": 1.9226410388946533, "learning_rate": 1.2927227265509767e-07, "loss": 0.7914, "step": 26319 }, { "epoch": 0.929669873592095, "grad_norm": 1.8730086088180542, "learning_rate": 1.2914307528522573e-07, "loss": 0.7633, "step": 26320 }, { "epoch": 0.9297051953958029, "grad_norm": 1.6471803188323975, "learning_rate": 1.2901394166421443e-07, "loss": 0.7653, "step": 26321 }, { "epoch": 0.9297405171995108, "grad_norm": 1.737306833267212, "learning_rate": 1.2888487179375297e-07, "loss": 0.7173, "step": 26322 }, { "epoch": 0.9297758390032187, "grad_norm": 1.721166968345642, "learning_rate": 1.2875586567553056e-07, "loss": 0.7963, "step": 26323 }, { "epoch": 0.9298111608069266, "grad_norm": 1.5988962650299072, "learning_rate": 1.2862692331123528e-07, "loss": 0.7433, "step": 26324 }, { "epoch": 0.9298464826106345, "grad_norm": 1.635015606880188, "learning_rate": 1.2849804470255577e-07, "loss": 0.7424, "step": 26325 }, { "epoch": 0.9298818044143424, "grad_norm": 1.8246042728424072, "learning_rate": 1.283692298511785e-07, "loss": 0.7404, "step": 26326 }, { "epoch": 0.9299171262180503, "grad_norm": 1.6800949573516846, "learning_rate": 1.2824047875878765e-07, "loss": 0.7791, "step": 26327 }, { "epoch": 0.9299524480217582, "grad_norm": 1.7954827547073364, "learning_rate": 1.2811179142707019e-07, "loss": 0.7709, "step": 26328 }, { "epoch": 0.9299877698254662, "grad_norm": 1.6195125579833984, "learning_rate": 1.2798316785771037e-07, "loss": 0.7419, "step": 26329 }, { "epoch": 0.9300230916291741, "grad_norm": 1.9565342664718628, "learning_rate": 1.2785460805239014e-07, "loss": 0.7627, "step": 26330 }, { "epoch": 0.930058413432882, "grad_norm": 1.8393070697784424, "learning_rate": 1.2772611201279316e-07, "loss": 0.7756, "step": 26331 }, { "epoch": 0.9300937352365899, "grad_norm": 2.0321335792541504, "learning_rate": 1.2759767974060143e-07, "loss": 0.76, "step": 26332 }, { "epoch": 0.9301290570402978, "grad_norm": 1.5135571956634521, "learning_rate": 1.2746931123749473e-07, "loss": 0.7483, "step": 26333 }, { "epoch": 0.9301643788440057, "grad_norm": 1.5680782794952393, "learning_rate": 1.273410065051539e-07, "loss": 0.7394, "step": 26334 }, { "epoch": 0.9301997006477136, "grad_norm": 1.6670876741409302, "learning_rate": 1.2721276554525876e-07, "loss": 0.7487, "step": 26335 }, { "epoch": 0.9302350224514215, "grad_norm": 2.102661371231079, "learning_rate": 1.2708458835948623e-07, "loss": 0.7526, "step": 26336 }, { "epoch": 0.9302703442551293, "grad_norm": 1.696564793586731, "learning_rate": 1.2695647494951447e-07, "loss": 0.7411, "step": 26337 }, { "epoch": 0.9303056660588372, "grad_norm": 1.9051628112792969, "learning_rate": 1.26828425317021e-07, "loss": 0.7556, "step": 26338 }, { "epoch": 0.9303409878625452, "grad_norm": 1.8738197088241577, "learning_rate": 1.2670043946368004e-07, "loss": 0.7636, "step": 26339 }, { "epoch": 0.9303763096662531, "grad_norm": 1.5787769556045532, "learning_rate": 1.2657251739116805e-07, "loss": 0.7294, "step": 26340 }, { "epoch": 0.930411631469961, "grad_norm": 1.589779019355774, "learning_rate": 1.2644465910115977e-07, "loss": 0.7575, "step": 26341 }, { "epoch": 0.9304469532736689, "grad_norm": 1.6729559898376465, "learning_rate": 1.2631686459532667e-07, "loss": 0.7769, "step": 26342 }, { "epoch": 0.9304822750773768, "grad_norm": 1.6950174570083618, "learning_rate": 1.2618913387534238e-07, "loss": 0.7688, "step": 26343 }, { "epoch": 0.9305175968810847, "grad_norm": 1.6134705543518066, "learning_rate": 1.260614669428789e-07, "loss": 0.695, "step": 26344 }, { "epoch": 0.9305529186847926, "grad_norm": 1.9284056425094604, "learning_rate": 1.2593386379960715e-07, "loss": 0.7492, "step": 26345 }, { "epoch": 0.9305882404885005, "grad_norm": 1.910807490348816, "learning_rate": 1.2580632444719575e-07, "loss": 0.763, "step": 26346 }, { "epoch": 0.9306235622922084, "grad_norm": 1.9881725311279297, "learning_rate": 1.2567884888731508e-07, "loss": 0.7541, "step": 26347 }, { "epoch": 0.9306588840959163, "grad_norm": 1.8321547508239746, "learning_rate": 1.2555143712163375e-07, "loss": 0.7794, "step": 26348 }, { "epoch": 0.9306942058996243, "grad_norm": 1.9698024988174438, "learning_rate": 1.254240891518188e-07, "loss": 0.7631, "step": 26349 }, { "epoch": 0.9307295277033322, "grad_norm": 1.9205456972122192, "learning_rate": 1.2529680497953666e-07, "loss": 0.7457, "step": 26350 }, { "epoch": 0.9307648495070401, "grad_norm": 1.6822576522827148, "learning_rate": 1.2516958460645434e-07, "loss": 0.7767, "step": 26351 }, { "epoch": 0.930800171310748, "grad_norm": 1.7859162092208862, "learning_rate": 1.2504242803423605e-07, "loss": 0.7598, "step": 26352 }, { "epoch": 0.9308354931144559, "grad_norm": 1.7562799453735352, "learning_rate": 1.2491533526454546e-07, "loss": 0.7746, "step": 26353 }, { "epoch": 0.9308708149181638, "grad_norm": 1.6303256750106812, "learning_rate": 1.2478830629904683e-07, "loss": 0.7703, "step": 26354 }, { "epoch": 0.9309061367218717, "grad_norm": 1.6304432153701782, "learning_rate": 1.2466134113940264e-07, "loss": 0.7348, "step": 26355 }, { "epoch": 0.9309414585255796, "grad_norm": 1.835507869720459, "learning_rate": 1.2453443978727387e-07, "loss": 0.7712, "step": 26356 }, { "epoch": 0.9309767803292875, "grad_norm": 1.6750168800354004, "learning_rate": 1.2440760224432247e-07, "loss": 0.7416, "step": 26357 }, { "epoch": 0.9310121021329955, "grad_norm": 1.5927929878234863, "learning_rate": 1.2428082851220713e-07, "loss": 0.7253, "step": 26358 }, { "epoch": 0.9310474239367034, "grad_norm": 2.0702388286590576, "learning_rate": 1.2415411859258819e-07, "loss": 0.7189, "step": 26359 }, { "epoch": 0.9310827457404113, "grad_norm": 1.7593305110931396, "learning_rate": 1.2402747248712377e-07, "loss": 0.7497, "step": 26360 }, { "epoch": 0.9311180675441192, "grad_norm": 2.085296154022217, "learning_rate": 1.2390089019747087e-07, "loss": 0.7827, "step": 26361 }, { "epoch": 0.9311533893478271, "grad_norm": 1.7941659688949585, "learning_rate": 1.237743717252865e-07, "loss": 0.8034, "step": 26362 }, { "epoch": 0.9311887111515349, "grad_norm": 1.6816784143447876, "learning_rate": 1.2364791707222712e-07, "loss": 0.7905, "step": 26363 }, { "epoch": 0.9312240329552428, "grad_norm": 2.1341309547424316, "learning_rate": 1.2352152623994695e-07, "loss": 0.8003, "step": 26364 }, { "epoch": 0.9312593547589507, "grad_norm": 1.6658744812011719, "learning_rate": 1.2339519923010024e-07, "loss": 0.7417, "step": 26365 }, { "epoch": 0.9312946765626586, "grad_norm": 1.7154853343963623, "learning_rate": 1.2326893604434008e-07, "loss": 0.7293, "step": 26366 }, { "epoch": 0.9313299983663665, "grad_norm": 1.645447015762329, "learning_rate": 1.2314273668431964e-07, "loss": 0.7054, "step": 26367 }, { "epoch": 0.9313653201700745, "grad_norm": 1.7546182870864868, "learning_rate": 1.2301660115169034e-07, "loss": 0.7599, "step": 26368 }, { "epoch": 0.9314006419737824, "grad_norm": 1.6876256465911865, "learning_rate": 1.2289052944810253e-07, "loss": 0.772, "step": 26369 }, { "epoch": 0.9314359637774903, "grad_norm": 1.7643518447875977, "learning_rate": 1.227645215752077e-07, "loss": 0.7548, "step": 26370 }, { "epoch": 0.9314712855811982, "grad_norm": 1.5922949314117432, "learning_rate": 1.2263857753465336e-07, "loss": 0.7598, "step": 26371 }, { "epoch": 0.9315066073849061, "grad_norm": 1.6169968843460083, "learning_rate": 1.2251269732808823e-07, "loss": 0.7629, "step": 26372 }, { "epoch": 0.931541929188614, "grad_norm": 1.749993920326233, "learning_rate": 1.2238688095716045e-07, "loss": 0.7415, "step": 26373 }, { "epoch": 0.9315772509923219, "grad_norm": 1.7502057552337646, "learning_rate": 1.222611284235159e-07, "loss": 0.7741, "step": 26374 }, { "epoch": 0.9316125727960298, "grad_norm": 1.8372446298599243, "learning_rate": 1.2213543972880105e-07, "loss": 0.7568, "step": 26375 }, { "epoch": 0.9316478945997377, "grad_norm": 1.5480810403823853, "learning_rate": 1.2200981487466012e-07, "loss": 0.7264, "step": 26376 }, { "epoch": 0.9316832164034456, "grad_norm": 2.950512170791626, "learning_rate": 1.2188425386273905e-07, "loss": 0.7224, "step": 26377 }, { "epoch": 0.9317185382071536, "grad_norm": 2.034475564956665, "learning_rate": 1.2175875669467873e-07, "loss": 0.7751, "step": 26378 }, { "epoch": 0.9317538600108615, "grad_norm": 1.766184687614441, "learning_rate": 1.2163332337212285e-07, "loss": 0.719, "step": 26379 }, { "epoch": 0.9317891818145694, "grad_norm": 1.734394907951355, "learning_rate": 1.21507953896714e-07, "loss": 0.7804, "step": 26380 }, { "epoch": 0.9318245036182773, "grad_norm": 1.6859642267227173, "learning_rate": 1.2138264827009082e-07, "loss": 0.7684, "step": 26381 }, { "epoch": 0.9318598254219852, "grad_norm": 1.9363465309143066, "learning_rate": 1.2125740649389538e-07, "loss": 0.7754, "step": 26382 }, { "epoch": 0.9318951472256931, "grad_norm": 1.7866740226745605, "learning_rate": 1.211322285697658e-07, "loss": 0.7574, "step": 26383 }, { "epoch": 0.931930469029401, "grad_norm": 1.6623237133026123, "learning_rate": 1.2100711449933967e-07, "loss": 0.7386, "step": 26384 }, { "epoch": 0.9319657908331089, "grad_norm": 1.7946914434432983, "learning_rate": 1.2088206428425563e-07, "loss": 0.7368, "step": 26385 }, { "epoch": 0.9320011126368168, "grad_norm": 2.0388660430908203, "learning_rate": 1.207570779261502e-07, "loss": 0.7638, "step": 26386 }, { "epoch": 0.9320364344405248, "grad_norm": 1.7778929471969604, "learning_rate": 1.2063215542665928e-07, "loss": 0.7481, "step": 26387 }, { "epoch": 0.9320717562442327, "grad_norm": 1.7511065006256104, "learning_rate": 1.2050729678741657e-07, "loss": 0.7618, "step": 26388 }, { "epoch": 0.9321070780479405, "grad_norm": 1.688236951828003, "learning_rate": 1.2038250201005742e-07, "loss": 0.7694, "step": 26389 }, { "epoch": 0.9321423998516484, "grad_norm": 1.7844499349594116, "learning_rate": 1.202577710962155e-07, "loss": 0.775, "step": 26390 }, { "epoch": 0.9321777216553563, "grad_norm": 1.7388066053390503, "learning_rate": 1.2013310404752177e-07, "loss": 0.7616, "step": 26391 }, { "epoch": 0.9322130434590642, "grad_norm": 2.2663753032684326, "learning_rate": 1.200085008656088e-07, "loss": 0.7461, "step": 26392 }, { "epoch": 0.9322483652627721, "grad_norm": 1.756639838218689, "learning_rate": 1.1988396155210746e-07, "loss": 0.7658, "step": 26393 }, { "epoch": 0.93228368706648, "grad_norm": 1.6224372386932373, "learning_rate": 1.1975948610864706e-07, "loss": 0.7565, "step": 26394 }, { "epoch": 0.9323190088701879, "grad_norm": 1.9141626358032227, "learning_rate": 1.196350745368574e-07, "loss": 0.7882, "step": 26395 }, { "epoch": 0.9323543306738958, "grad_norm": 1.6436330080032349, "learning_rate": 1.1951072683836717e-07, "loss": 0.7539, "step": 26396 }, { "epoch": 0.9323896524776037, "grad_norm": 1.5000416040420532, "learning_rate": 1.193864430148023e-07, "loss": 0.7342, "step": 26397 }, { "epoch": 0.9324249742813117, "grad_norm": 1.7806075811386108, "learning_rate": 1.1926222306778978e-07, "loss": 0.753, "step": 26398 }, { "epoch": 0.9324602960850196, "grad_norm": 2.2764511108398438, "learning_rate": 1.1913806699895669e-07, "loss": 0.7914, "step": 26399 }, { "epoch": 0.9324956178887275, "grad_norm": 1.540130615234375, "learning_rate": 1.1901397480992727e-07, "loss": 0.7714, "step": 26400 }, { "epoch": 0.9325309396924354, "grad_norm": 1.663462519645691, "learning_rate": 1.1888994650232466e-07, "loss": 0.7192, "step": 26401 }, { "epoch": 0.9325662614961433, "grad_norm": 1.731437087059021, "learning_rate": 1.1876598207777313e-07, "loss": 0.7554, "step": 26402 }, { "epoch": 0.9326015832998512, "grad_norm": 1.7377773523330688, "learning_rate": 1.1864208153789525e-07, "loss": 0.761, "step": 26403 }, { "epoch": 0.9326369051035591, "grad_norm": 1.644698143005371, "learning_rate": 1.1851824488431197e-07, "loss": 0.7617, "step": 26404 }, { "epoch": 0.932672226907267, "grad_norm": 1.7397613525390625, "learning_rate": 1.1839447211864363e-07, "loss": 0.7299, "step": 26405 }, { "epoch": 0.9327075487109749, "grad_norm": 1.7171170711517334, "learning_rate": 1.1827076324251174e-07, "loss": 0.8031, "step": 26406 }, { "epoch": 0.9327428705146829, "grad_norm": 1.7993358373641968, "learning_rate": 1.1814711825753389e-07, "loss": 0.7916, "step": 26407 }, { "epoch": 0.9327781923183908, "grad_norm": 1.6195409297943115, "learning_rate": 1.1802353716532877e-07, "loss": 0.7664, "step": 26408 }, { "epoch": 0.9328135141220987, "grad_norm": 1.722015142440796, "learning_rate": 1.1790001996751455e-07, "loss": 0.7533, "step": 26409 }, { "epoch": 0.9328488359258066, "grad_norm": 1.8488614559173584, "learning_rate": 1.1777656666570658e-07, "loss": 0.7644, "step": 26410 }, { "epoch": 0.9328841577295145, "grad_norm": 1.5230324268341064, "learning_rate": 1.1765317726152081e-07, "loss": 0.7545, "step": 26411 }, { "epoch": 0.9329194795332224, "grad_norm": 1.8766437768936157, "learning_rate": 1.1752985175657316e-07, "loss": 0.7611, "step": 26412 }, { "epoch": 0.9329548013369303, "grad_norm": 1.75705087184906, "learning_rate": 1.1740659015247679e-07, "loss": 0.784, "step": 26413 }, { "epoch": 0.9329901231406382, "grad_norm": 1.7125262022018433, "learning_rate": 1.1728339245084485e-07, "loss": 0.7879, "step": 26414 }, { "epoch": 0.9330254449443461, "grad_norm": 0.9104928374290466, "learning_rate": 1.171602586532905e-07, "loss": 0.546, "step": 26415 }, { "epoch": 0.9330607667480539, "grad_norm": 1.5894973278045654, "learning_rate": 1.1703718876142468e-07, "loss": 0.7189, "step": 26416 }, { "epoch": 0.9330960885517618, "grad_norm": 1.7287545204162598, "learning_rate": 1.1691418277685829e-07, "loss": 0.7413, "step": 26417 }, { "epoch": 0.9331314103554698, "grad_norm": 1.7058497667312622, "learning_rate": 1.1679124070120118e-07, "loss": 0.7474, "step": 26418 }, { "epoch": 0.9331667321591777, "grad_norm": 1.8800082206726074, "learning_rate": 1.1666836253606261e-07, "loss": 0.7666, "step": 26419 }, { "epoch": 0.9332020539628856, "grad_norm": 1.9407670497894287, "learning_rate": 1.1654554828304964e-07, "loss": 0.7462, "step": 26420 }, { "epoch": 0.9332373757665935, "grad_norm": 1.812044382095337, "learning_rate": 1.1642279794377098e-07, "loss": 0.7112, "step": 26421 }, { "epoch": 0.9332726975703014, "grad_norm": 2.4070727825164795, "learning_rate": 1.1630011151983311e-07, "loss": 0.7582, "step": 26422 }, { "epoch": 0.9333080193740093, "grad_norm": 1.812331199645996, "learning_rate": 1.1617748901284087e-07, "loss": 0.7813, "step": 26423 }, { "epoch": 0.9333433411777172, "grad_norm": 0.9578467011451721, "learning_rate": 1.1605493042439964e-07, "loss": 0.5899, "step": 26424 }, { "epoch": 0.9333786629814251, "grad_norm": 14.467488288879395, "learning_rate": 1.1593243575611369e-07, "loss": 0.777, "step": 26425 }, { "epoch": 0.933413984785133, "grad_norm": 1.7190945148468018, "learning_rate": 1.1581000500958561e-07, "loss": 0.7612, "step": 26426 }, { "epoch": 0.933449306588841, "grad_norm": 1.6103264093399048, "learning_rate": 1.1568763818641804e-07, "loss": 0.7855, "step": 26427 }, { "epoch": 0.9334846283925489, "grad_norm": 1.6340484619140625, "learning_rate": 1.15565335288213e-07, "loss": 0.7513, "step": 26428 }, { "epoch": 0.9335199501962568, "grad_norm": 1.6164394617080688, "learning_rate": 1.1544309631657036e-07, "loss": 0.7413, "step": 26429 }, { "epoch": 0.9335552719999647, "grad_norm": 1.8173974752426147, "learning_rate": 1.1532092127308992e-07, "loss": 0.7786, "step": 26430 }, { "epoch": 0.9335905938036726, "grad_norm": 1.6392180919647217, "learning_rate": 1.1519881015937207e-07, "loss": 0.7829, "step": 26431 }, { "epoch": 0.9336259156073805, "grad_norm": 1.6641173362731934, "learning_rate": 1.1507676297701276e-07, "loss": 0.7605, "step": 26432 }, { "epoch": 0.9336612374110884, "grad_norm": 1.8824756145477295, "learning_rate": 1.1495477972761126e-07, "loss": 0.8281, "step": 26433 }, { "epoch": 0.9336965592147963, "grad_norm": 1.9416043758392334, "learning_rate": 1.1483286041276298e-07, "loss": 0.7366, "step": 26434 }, { "epoch": 0.9337318810185042, "grad_norm": 2.6023693084716797, "learning_rate": 1.1471100503406385e-07, "loss": 0.75, "step": 26435 }, { "epoch": 0.9337672028222122, "grad_norm": 1.7660382986068726, "learning_rate": 1.1458921359310871e-07, "loss": 0.761, "step": 26436 }, { "epoch": 0.9338025246259201, "grad_norm": 1.7844470739364624, "learning_rate": 1.1446748609149239e-07, "loss": 0.7336, "step": 26437 }, { "epoch": 0.933837846429628, "grad_norm": 1.758198618888855, "learning_rate": 1.1434582253080695e-07, "loss": 0.7355, "step": 26438 }, { "epoch": 0.9338731682333359, "grad_norm": 1.9007552862167358, "learning_rate": 1.1422422291264445e-07, "loss": 0.7738, "step": 26439 }, { "epoch": 0.9339084900370438, "grad_norm": 1.8210912942886353, "learning_rate": 1.1410268723859697e-07, "loss": 0.7132, "step": 26440 }, { "epoch": 0.9339438118407517, "grad_norm": 1.7632110118865967, "learning_rate": 1.1398121551025543e-07, "loss": 0.7471, "step": 26441 }, { "epoch": 0.9339791336444595, "grad_norm": 1.7747631072998047, "learning_rate": 1.1385980772920913e-07, "loss": 0.7779, "step": 26442 }, { "epoch": 0.9340144554481674, "grad_norm": 1.7626367807388306, "learning_rate": 1.137384638970468e-07, "loss": 0.7316, "step": 26443 }, { "epoch": 0.9340497772518753, "grad_norm": 1.5802415609359741, "learning_rate": 1.1361718401535715e-07, "loss": 0.7639, "step": 26444 }, { "epoch": 0.9340850990555832, "grad_norm": 1.7134262323379517, "learning_rate": 1.1349596808572782e-07, "loss": 0.7621, "step": 26445 }, { "epoch": 0.9341204208592911, "grad_norm": 1.7130508422851562, "learning_rate": 1.1337481610974366e-07, "loss": 0.7753, "step": 26446 }, { "epoch": 0.9341557426629991, "grad_norm": 1.6769435405731201, "learning_rate": 1.1325372808899171e-07, "loss": 0.7871, "step": 26447 }, { "epoch": 0.934191064466707, "grad_norm": 1.8098598718643188, "learning_rate": 1.1313270402505683e-07, "loss": 0.767, "step": 26448 }, { "epoch": 0.9342263862704149, "grad_norm": 1.7517611980438232, "learning_rate": 1.1301174391952219e-07, "loss": 0.8031, "step": 26449 }, { "epoch": 0.9342617080741228, "grad_norm": 1.842131495475769, "learning_rate": 1.1289084777397097e-07, "loss": 0.7933, "step": 26450 }, { "epoch": 0.9342970298778307, "grad_norm": 1.6992758512496948, "learning_rate": 1.1277001558998635e-07, "loss": 0.8026, "step": 26451 }, { "epoch": 0.9343323516815386, "grad_norm": 1.73737370967865, "learning_rate": 1.1264924736914817e-07, "loss": 0.7433, "step": 26452 }, { "epoch": 0.9343676734852465, "grad_norm": 1.9451955556869507, "learning_rate": 1.1252854311303852e-07, "loss": 0.7639, "step": 26453 }, { "epoch": 0.9344029952889544, "grad_norm": 1.5852537155151367, "learning_rate": 1.1240790282323665e-07, "loss": 0.7484, "step": 26454 }, { "epoch": 0.9344383170926623, "grad_norm": 2.3220300674438477, "learning_rate": 1.1228732650132079e-07, "loss": 0.7531, "step": 26455 }, { "epoch": 0.9344736388963703, "grad_norm": 1.543940544128418, "learning_rate": 1.1216681414887021e-07, "loss": 0.7641, "step": 26456 }, { "epoch": 0.9345089607000782, "grad_norm": 1.6237578392028809, "learning_rate": 1.1204636576746087e-07, "loss": 0.7937, "step": 26457 }, { "epoch": 0.9345442825037861, "grad_norm": 1.5938189029693604, "learning_rate": 1.1192598135867094e-07, "loss": 0.768, "step": 26458 }, { "epoch": 0.934579604307494, "grad_norm": 1.95473051071167, "learning_rate": 1.1180566092407419e-07, "loss": 0.7423, "step": 26459 }, { "epoch": 0.9346149261112019, "grad_norm": 1.7474626302719116, "learning_rate": 1.1168540446524545e-07, "loss": 0.7357, "step": 26460 }, { "epoch": 0.9346502479149098, "grad_norm": 1.6020050048828125, "learning_rate": 1.1156521198376069e-07, "loss": 0.7653, "step": 26461 }, { "epoch": 0.9346855697186177, "grad_norm": 1.5914714336395264, "learning_rate": 1.1144508348119032e-07, "loss": 0.7668, "step": 26462 }, { "epoch": 0.9347208915223256, "grad_norm": 1.6351958513259888, "learning_rate": 1.1132501895910808e-07, "loss": 0.7535, "step": 26463 }, { "epoch": 0.9347562133260335, "grad_norm": 1.7348014116287231, "learning_rate": 1.1120501841908548e-07, "loss": 0.7596, "step": 26464 }, { "epoch": 0.9347915351297414, "grad_norm": 1.590509057044983, "learning_rate": 1.1108508186269184e-07, "loss": 0.7635, "step": 26465 }, { "epoch": 0.9348268569334494, "grad_norm": 1.724654197692871, "learning_rate": 1.1096520929149812e-07, "loss": 0.7924, "step": 26466 }, { "epoch": 0.9348621787371573, "grad_norm": 1.910102128982544, "learning_rate": 1.108454007070725e-07, "loss": 0.7797, "step": 26467 }, { "epoch": 0.9348975005408651, "grad_norm": 1.703758716583252, "learning_rate": 1.1072565611098318e-07, "loss": 0.7297, "step": 26468 }, { "epoch": 0.934932822344573, "grad_norm": 1.5474753379821777, "learning_rate": 1.1060597550479723e-07, "loss": 0.7622, "step": 26469 }, { "epoch": 0.9349681441482809, "grad_norm": 1.5880122184753418, "learning_rate": 1.1048635889008174e-07, "loss": 0.7233, "step": 26470 }, { "epoch": 0.9350034659519888, "grad_norm": 1.7505104541778564, "learning_rate": 1.1036680626840158e-07, "loss": 0.7616, "step": 26471 }, { "epoch": 0.9350387877556967, "grad_norm": 4.393076419830322, "learning_rate": 1.1024731764132102e-07, "loss": 0.7469, "step": 26472 }, { "epoch": 0.9350741095594046, "grad_norm": 1.9102004766464233, "learning_rate": 1.1012789301040494e-07, "loss": 0.7706, "step": 26473 }, { "epoch": 0.9351094313631125, "grad_norm": 1.8999006748199463, "learning_rate": 1.1000853237721598e-07, "loss": 0.7712, "step": 26474 }, { "epoch": 0.9351447531668204, "grad_norm": 1.6626064777374268, "learning_rate": 1.0988923574331567e-07, "loss": 0.7494, "step": 26475 }, { "epoch": 0.9351800749705284, "grad_norm": 2.0401976108551025, "learning_rate": 1.0977000311026554e-07, "loss": 0.7647, "step": 26476 }, { "epoch": 0.9352153967742363, "grad_norm": 1.7060115337371826, "learning_rate": 1.0965083447962654e-07, "loss": 0.7691, "step": 26477 }, { "epoch": 0.9352507185779442, "grad_norm": 1.653181552886963, "learning_rate": 1.0953172985295801e-07, "loss": 0.747, "step": 26478 }, { "epoch": 0.9352860403816521, "grad_norm": 1.5688027143478394, "learning_rate": 1.0941268923181924e-07, "loss": 0.7441, "step": 26479 }, { "epoch": 0.93532136218536, "grad_norm": 1.7924689054489136, "learning_rate": 1.0929371261776789e-07, "loss": 0.8018, "step": 26480 }, { "epoch": 0.9353566839890679, "grad_norm": 1.9404661655426025, "learning_rate": 1.0917480001236103e-07, "loss": 0.7431, "step": 26481 }, { "epoch": 0.9353920057927758, "grad_norm": 1.6932185888290405, "learning_rate": 1.0905595141715464e-07, "loss": 0.7737, "step": 26482 }, { "epoch": 0.9354273275964837, "grad_norm": 1.6913330554962158, "learning_rate": 1.0893716683370525e-07, "loss": 0.7177, "step": 26483 }, { "epoch": 0.9354626494001916, "grad_norm": 2.683439016342163, "learning_rate": 1.0881844626356609e-07, "loss": 0.7739, "step": 26484 }, { "epoch": 0.9354979712038995, "grad_norm": 1.6654045581817627, "learning_rate": 1.0869978970829143e-07, "loss": 0.7738, "step": 26485 }, { "epoch": 0.9355332930076075, "grad_norm": 1.9349735975265503, "learning_rate": 1.085811971694356e-07, "loss": 0.7377, "step": 26486 }, { "epoch": 0.9355686148113154, "grad_norm": 1.6825567483901978, "learning_rate": 1.0846266864854848e-07, "loss": 0.7314, "step": 26487 }, { "epoch": 0.9356039366150233, "grad_norm": 1.675044059753418, "learning_rate": 1.0834420414718271e-07, "loss": 0.7698, "step": 26488 }, { "epoch": 0.9356392584187312, "grad_norm": 1.580875277519226, "learning_rate": 1.0822580366688928e-07, "loss": 0.7785, "step": 26489 }, { "epoch": 0.9356745802224391, "grad_norm": 1.558316707611084, "learning_rate": 1.0810746720921583e-07, "loss": 0.7526, "step": 26490 }, { "epoch": 0.935709902026147, "grad_norm": 1.6434597969055176, "learning_rate": 1.0798919477571279e-07, "loss": 0.7649, "step": 26491 }, { "epoch": 0.9357452238298549, "grad_norm": 1.7278556823730469, "learning_rate": 1.0787098636792781e-07, "loss": 0.7616, "step": 26492 }, { "epoch": 0.9357805456335628, "grad_norm": 0.955325722694397, "learning_rate": 1.0775284198740799e-07, "loss": 0.5787, "step": 26493 }, { "epoch": 0.9358158674372706, "grad_norm": 1.6727476119995117, "learning_rate": 1.0763476163569875e-07, "loss": 0.7253, "step": 26494 }, { "epoch": 0.9358511892409785, "grad_norm": 1.6089268922805786, "learning_rate": 1.0751674531434609e-07, "loss": 0.7465, "step": 26495 }, { "epoch": 0.9358865110446865, "grad_norm": 1.5439903736114502, "learning_rate": 1.0739879302489487e-07, "loss": 0.7403, "step": 26496 }, { "epoch": 0.9359218328483944, "grad_norm": 1.707919716835022, "learning_rate": 1.0728090476888831e-07, "loss": 0.7609, "step": 26497 }, { "epoch": 0.9359571546521023, "grad_norm": 1.684189796447754, "learning_rate": 1.0716308054786905e-07, "loss": 0.7526, "step": 26498 }, { "epoch": 0.9359924764558102, "grad_norm": 1.707319736480713, "learning_rate": 1.0704532036338089e-07, "loss": 0.778, "step": 26499 }, { "epoch": 0.9360277982595181, "grad_norm": 1.6433466672897339, "learning_rate": 1.0692762421696257e-07, "loss": 0.7427, "step": 26500 }, { "epoch": 0.936063120063226, "grad_norm": 1.6802798509597778, "learning_rate": 1.068099921101562e-07, "loss": 0.768, "step": 26501 }, { "epoch": 0.9360984418669339, "grad_norm": 1.7752699851989746, "learning_rate": 1.0669242404450053e-07, "loss": 0.7762, "step": 26502 }, { "epoch": 0.9361337636706418, "grad_norm": 1.5857549905776978, "learning_rate": 1.0657492002153547e-07, "loss": 0.7485, "step": 26503 }, { "epoch": 0.9361690854743497, "grad_norm": 1.6664211750030518, "learning_rate": 1.06457480042797e-07, "loss": 0.7597, "step": 26504 }, { "epoch": 0.9362044072780576, "grad_norm": 1.824542760848999, "learning_rate": 1.0634010410982332e-07, "loss": 0.7263, "step": 26505 }, { "epoch": 0.9362397290817656, "grad_norm": 1.6802114248275757, "learning_rate": 1.0622279222415099e-07, "loss": 0.738, "step": 26506 }, { "epoch": 0.9362750508854735, "grad_norm": 1.7363401651382446, "learning_rate": 1.061055443873149e-07, "loss": 0.7832, "step": 26507 }, { "epoch": 0.9363103726891814, "grad_norm": 1.7044399976730347, "learning_rate": 1.0598836060084882e-07, "loss": 0.7687, "step": 26508 }, { "epoch": 0.9363456944928893, "grad_norm": 1.7456629276275635, "learning_rate": 1.0587124086628764e-07, "loss": 0.7789, "step": 26509 }, { "epoch": 0.9363810162965972, "grad_norm": 1.6271028518676758, "learning_rate": 1.0575418518516345e-07, "loss": 0.7527, "step": 26510 }, { "epoch": 0.9364163381003051, "grad_norm": 1.8240748643875122, "learning_rate": 1.0563719355900836e-07, "loss": 0.8188, "step": 26511 }, { "epoch": 0.936451659904013, "grad_norm": 1.8975685834884644, "learning_rate": 1.0552026598935394e-07, "loss": 0.7565, "step": 26512 }, { "epoch": 0.9364869817077209, "grad_norm": 1.625806212425232, "learning_rate": 1.0540340247773006e-07, "loss": 0.7445, "step": 26513 }, { "epoch": 0.9365223035114288, "grad_norm": 1.7412588596343994, "learning_rate": 1.0528660302566606e-07, "loss": 0.7723, "step": 26514 }, { "epoch": 0.9365576253151368, "grad_norm": 1.6329463720321655, "learning_rate": 1.0516986763469128e-07, "loss": 0.7707, "step": 26515 }, { "epoch": 0.9365929471188447, "grad_norm": 2.2238986492156982, "learning_rate": 1.0505319630633337e-07, "loss": 0.75, "step": 26516 }, { "epoch": 0.9366282689225526, "grad_norm": 1.7401010990142822, "learning_rate": 1.0493658904211834e-07, "loss": 0.7932, "step": 26517 }, { "epoch": 0.9366635907262605, "grad_norm": 1.677119493484497, "learning_rate": 1.0482004584357331e-07, "loss": 0.7565, "step": 26518 }, { "epoch": 0.9366989125299684, "grad_norm": 1.6958948373794556, "learning_rate": 1.047035667122237e-07, "loss": 0.8014, "step": 26519 }, { "epoch": 0.9367342343336762, "grad_norm": 1.5368571281433105, "learning_rate": 1.0458715164959333e-07, "loss": 0.732, "step": 26520 }, { "epoch": 0.9367695561373841, "grad_norm": 1.643744945526123, "learning_rate": 1.0447080065720594e-07, "loss": 0.7848, "step": 26521 }, { "epoch": 0.936804877941092, "grad_norm": 1.5825364589691162, "learning_rate": 1.0435451373658478e-07, "loss": 0.7479, "step": 26522 }, { "epoch": 0.9368401997447999, "grad_norm": 3.4767725467681885, "learning_rate": 1.0423829088925086e-07, "loss": 0.7974, "step": 26523 }, { "epoch": 0.9368755215485078, "grad_norm": 1.6661174297332764, "learning_rate": 1.0412213211672629e-07, "loss": 0.8133, "step": 26524 }, { "epoch": 0.9369108433522157, "grad_norm": 2.7459044456481934, "learning_rate": 1.040060374205315e-07, "loss": 0.7658, "step": 26525 }, { "epoch": 0.9369461651559237, "grad_norm": 1.7097545862197876, "learning_rate": 1.0389000680218474e-07, "loss": 0.7506, "step": 26526 }, { "epoch": 0.9369814869596316, "grad_norm": 1.7901148796081543, "learning_rate": 1.0377404026320481e-07, "loss": 0.7734, "step": 26527 }, { "epoch": 0.9370168087633395, "grad_norm": 2.0584399700164795, "learning_rate": 1.0365813780511048e-07, "loss": 0.7561, "step": 26528 }, { "epoch": 0.9370521305670474, "grad_norm": 1.6886667013168335, "learning_rate": 1.035422994294183e-07, "loss": 0.7645, "step": 26529 }, { "epoch": 0.9370874523707553, "grad_norm": 0.8797840476036072, "learning_rate": 1.0342652513764373e-07, "loss": 0.5485, "step": 26530 }, { "epoch": 0.9371227741744632, "grad_norm": 1.674506664276123, "learning_rate": 1.0331081493130168e-07, "loss": 0.7743, "step": 26531 }, { "epoch": 0.9371580959781711, "grad_norm": 1.778796672821045, "learning_rate": 1.0319516881190816e-07, "loss": 0.7445, "step": 26532 }, { "epoch": 0.937193417781879, "grad_norm": 1.964812994003296, "learning_rate": 1.030795867809753e-07, "loss": 0.792, "step": 26533 }, { "epoch": 0.937228739585587, "grad_norm": 1.5405502319335938, "learning_rate": 1.029640688400163e-07, "loss": 0.7559, "step": 26534 }, { "epoch": 0.9372640613892949, "grad_norm": 2.3097188472747803, "learning_rate": 1.0284861499054333e-07, "loss": 0.7908, "step": 26535 }, { "epoch": 0.9372993831930028, "grad_norm": 1.8261069059371948, "learning_rate": 1.0273322523406681e-07, "loss": 0.7523, "step": 26536 }, { "epoch": 0.9373347049967107, "grad_norm": 1.5929698944091797, "learning_rate": 1.0261789957209723e-07, "loss": 0.7367, "step": 26537 }, { "epoch": 0.9373700268004186, "grad_norm": 3.0448126792907715, "learning_rate": 1.0250263800614445e-07, "loss": 0.7329, "step": 26538 }, { "epoch": 0.9374053486041265, "grad_norm": 1.7589815855026245, "learning_rate": 1.0238744053771565e-07, "loss": 0.7727, "step": 26539 }, { "epoch": 0.9374406704078344, "grad_norm": 1.6811081171035767, "learning_rate": 1.0227230716832016e-07, "loss": 0.7731, "step": 26540 }, { "epoch": 0.9374759922115423, "grad_norm": 1.7654677629470825, "learning_rate": 1.0215723789946396e-07, "loss": 0.7295, "step": 26541 }, { "epoch": 0.9375113140152502, "grad_norm": 1.734816312789917, "learning_rate": 1.0204223273265257e-07, "loss": 0.751, "step": 26542 }, { "epoch": 0.9375466358189581, "grad_norm": 1.715274453163147, "learning_rate": 1.0192729166939197e-07, "loss": 0.7598, "step": 26543 }, { "epoch": 0.937581957622666, "grad_norm": 1.6602566242218018, "learning_rate": 1.0181241471118652e-07, "loss": 0.7248, "step": 26544 }, { "epoch": 0.937617279426374, "grad_norm": 1.8287729024887085, "learning_rate": 1.0169760185953948e-07, "loss": 0.7291, "step": 26545 }, { "epoch": 0.9376526012300818, "grad_norm": 1.7548801898956299, "learning_rate": 1.0158285311595295e-07, "loss": 0.7564, "step": 26546 }, { "epoch": 0.9376879230337897, "grad_norm": 1.7971271276474, "learning_rate": 1.0146816848192965e-07, "loss": 0.7, "step": 26547 }, { "epoch": 0.9377232448374976, "grad_norm": 1.770279049873352, "learning_rate": 1.0135354795897057e-07, "loss": 0.7785, "step": 26548 }, { "epoch": 0.9377585666412055, "grad_norm": 1.8055379390716553, "learning_rate": 1.0123899154857453e-07, "loss": 0.7819, "step": 26549 }, { "epoch": 0.9377938884449134, "grad_norm": 1.9347081184387207, "learning_rate": 1.0112449925224199e-07, "loss": 0.7677, "step": 26550 }, { "epoch": 0.9378292102486213, "grad_norm": 1.6937642097473145, "learning_rate": 1.010100710714712e-07, "loss": 0.7733, "step": 26551 }, { "epoch": 0.9378645320523292, "grad_norm": 0.9489094018936157, "learning_rate": 1.008957070077593e-07, "loss": 0.5447, "step": 26552 }, { "epoch": 0.9378998538560371, "grad_norm": 1.6971869468688965, "learning_rate": 1.0078140706260398e-07, "loss": 0.7688, "step": 26553 }, { "epoch": 0.937935175659745, "grad_norm": 1.644531011581421, "learning_rate": 1.0066717123750069e-07, "loss": 0.7527, "step": 26554 }, { "epoch": 0.937970497463453, "grad_norm": 1.710103988647461, "learning_rate": 1.0055299953394381e-07, "loss": 0.7787, "step": 26555 }, { "epoch": 0.9380058192671609, "grad_norm": 1.8411444425582886, "learning_rate": 1.0043889195342881e-07, "loss": 0.7338, "step": 26556 }, { "epoch": 0.9380411410708688, "grad_norm": 1.6254123449325562, "learning_rate": 1.0032484849744895e-07, "loss": 0.7908, "step": 26557 }, { "epoch": 0.9380764628745767, "grad_norm": 1.7641507387161255, "learning_rate": 1.0021086916749579e-07, "loss": 0.7764, "step": 26558 }, { "epoch": 0.9381117846782846, "grad_norm": 1.6395890712738037, "learning_rate": 1.0009695396506147e-07, "loss": 0.7521, "step": 26559 }, { "epoch": 0.9381471064819925, "grad_norm": 1.6163874864578247, "learning_rate": 9.99831028916376e-08, "loss": 0.7624, "step": 26560 }, { "epoch": 0.9381824282857004, "grad_norm": 1.5636810064315796, "learning_rate": 9.986931594871407e-08, "loss": 0.7846, "step": 26561 }, { "epoch": 0.9382177500894083, "grad_norm": 1.6613683700561523, "learning_rate": 9.975559313777916e-08, "loss": 0.7528, "step": 26562 }, { "epoch": 0.9382530718931162, "grad_norm": 1.5777920484542847, "learning_rate": 9.964193446032222e-08, "loss": 0.7525, "step": 26563 }, { "epoch": 0.9382883936968242, "grad_norm": 1.531122088432312, "learning_rate": 9.95283399178304e-08, "loss": 0.729, "step": 26564 }, { "epoch": 0.9383237155005321, "grad_norm": 1.6973007917404175, "learning_rate": 9.941480951179084e-08, "loss": 0.7898, "step": 26565 }, { "epoch": 0.93835903730424, "grad_norm": 1.6987214088439941, "learning_rate": 9.930134324368901e-08, "loss": 0.7457, "step": 26566 }, { "epoch": 0.9383943591079479, "grad_norm": 1.8369081020355225, "learning_rate": 9.918794111500984e-08, "loss": 0.755, "step": 26567 }, { "epoch": 0.9384296809116558, "grad_norm": 1.6710747480392456, "learning_rate": 9.907460312723715e-08, "loss": 0.7612, "step": 26568 }, { "epoch": 0.9384650027153637, "grad_norm": 1.8298118114471436, "learning_rate": 9.89613292818553e-08, "loss": 0.791, "step": 26569 }, { "epoch": 0.9385003245190716, "grad_norm": 2.2658252716064453, "learning_rate": 9.88481195803459e-08, "loss": 0.7511, "step": 26570 }, { "epoch": 0.9385356463227795, "grad_norm": 2.8177623748779297, "learning_rate": 9.873497402419163e-08, "loss": 0.7603, "step": 26571 }, { "epoch": 0.9385709681264873, "grad_norm": 1.6182372570037842, "learning_rate": 9.862189261487187e-08, "loss": 0.7532, "step": 26572 }, { "epoch": 0.9386062899301952, "grad_norm": 1.7738269567489624, "learning_rate": 9.850887535386766e-08, "loss": 0.7716, "step": 26573 }, { "epoch": 0.9386416117339031, "grad_norm": 1.48422372341156, "learning_rate": 9.839592224265837e-08, "loss": 0.7637, "step": 26574 }, { "epoch": 0.9386769335376111, "grad_norm": 1.5371720790863037, "learning_rate": 9.828303328272171e-08, "loss": 0.7491, "step": 26575 }, { "epoch": 0.938712255341319, "grad_norm": 1.635496973991394, "learning_rate": 9.817020847553482e-08, "loss": 0.7728, "step": 26576 }, { "epoch": 0.9387475771450269, "grad_norm": 1.9694979190826416, "learning_rate": 9.805744782257542e-08, "loss": 0.7938, "step": 26577 }, { "epoch": 0.9387828989487348, "grad_norm": 1.6541991233825684, "learning_rate": 9.794475132531844e-08, "loss": 0.7784, "step": 26578 }, { "epoch": 0.9388182207524427, "grad_norm": 1.8339577913284302, "learning_rate": 9.783211898523882e-08, "loss": 0.7602, "step": 26579 }, { "epoch": 0.9388535425561506, "grad_norm": 1.6582881212234497, "learning_rate": 9.771955080381146e-08, "loss": 0.7336, "step": 26580 }, { "epoch": 0.9388888643598585, "grad_norm": 1.7636876106262207, "learning_rate": 9.760704678250853e-08, "loss": 0.7654, "step": 26581 }, { "epoch": 0.9389241861635664, "grad_norm": 1.9312217235565186, "learning_rate": 9.749460692280277e-08, "loss": 0.7678, "step": 26582 }, { "epoch": 0.9389595079672743, "grad_norm": 2.012697458267212, "learning_rate": 9.738223122616686e-08, "loss": 0.7561, "step": 26583 }, { "epoch": 0.9389948297709823, "grad_norm": 1.681348204612732, "learning_rate": 9.726991969407019e-08, "loss": 0.7822, "step": 26584 }, { "epoch": 0.9390301515746902, "grad_norm": 1.7079237699508667, "learning_rate": 9.715767232798379e-08, "loss": 0.7747, "step": 26585 }, { "epoch": 0.9390654733783981, "grad_norm": 1.8642994165420532, "learning_rate": 9.70454891293754e-08, "loss": 0.7396, "step": 26586 }, { "epoch": 0.939100795182106, "grad_norm": 1.9680135250091553, "learning_rate": 9.693337009971438e-08, "loss": 0.7531, "step": 26587 }, { "epoch": 0.9391361169858139, "grad_norm": 1.6000463962554932, "learning_rate": 9.68213152404679e-08, "loss": 0.7741, "step": 26588 }, { "epoch": 0.9391714387895218, "grad_norm": 1.652545690536499, "learning_rate": 9.6709324553102e-08, "loss": 0.8102, "step": 26589 }, { "epoch": 0.9392067605932297, "grad_norm": 1.8773949146270752, "learning_rate": 9.659739803908275e-08, "loss": 0.7793, "step": 26590 }, { "epoch": 0.9392420823969376, "grad_norm": 1.8777060508728027, "learning_rate": 9.648553569987507e-08, "loss": 0.7352, "step": 26591 }, { "epoch": 0.9392774042006455, "grad_norm": 1.699723482131958, "learning_rate": 9.637373753694279e-08, "loss": 0.7711, "step": 26592 }, { "epoch": 0.9393127260043534, "grad_norm": 1.860237717628479, "learning_rate": 9.626200355174975e-08, "loss": 0.7758, "step": 26593 }, { "epoch": 0.9393480478080614, "grad_norm": 2.5519933700561523, "learning_rate": 9.615033374575755e-08, "loss": 0.7802, "step": 26594 }, { "epoch": 0.9393833696117693, "grad_norm": 1.8991613388061523, "learning_rate": 9.60387281204278e-08, "loss": 0.7728, "step": 26595 }, { "epoch": 0.9394186914154772, "grad_norm": 1.7228152751922607, "learning_rate": 9.592718667722156e-08, "loss": 0.7394, "step": 26596 }, { "epoch": 0.9394540132191851, "grad_norm": 1.7384426593780518, "learning_rate": 9.581570941759821e-08, "loss": 0.7579, "step": 26597 }, { "epoch": 0.9394893350228929, "grad_norm": 1.614322543144226, "learning_rate": 9.570429634301659e-08, "loss": 0.7227, "step": 26598 }, { "epoch": 0.9395246568266008, "grad_norm": 1.7955931425094604, "learning_rate": 9.559294745493608e-08, "loss": 0.6997, "step": 26599 }, { "epoch": 0.9395599786303087, "grad_norm": 1.9755672216415405, "learning_rate": 9.548166275481274e-08, "loss": 0.7446, "step": 26600 }, { "epoch": 0.9395953004340166, "grad_norm": 1.595977783203125, "learning_rate": 9.537044224410318e-08, "loss": 0.7497, "step": 26601 }, { "epoch": 0.9396306222377245, "grad_norm": 1.793874740600586, "learning_rate": 9.525928592426403e-08, "loss": 0.7593, "step": 26602 }, { "epoch": 0.9396659440414324, "grad_norm": 1.613867163658142, "learning_rate": 9.514819379674911e-08, "loss": 0.7597, "step": 26603 }, { "epoch": 0.9397012658451404, "grad_norm": 1.7155673503875732, "learning_rate": 9.503716586301226e-08, "loss": 0.7524, "step": 26604 }, { "epoch": 0.9397365876488483, "grad_norm": 1.7139474153518677, "learning_rate": 9.492620212450732e-08, "loss": 0.7643, "step": 26605 }, { "epoch": 0.9397719094525562, "grad_norm": 1.6281393766403198, "learning_rate": 9.481530258268589e-08, "loss": 0.7681, "step": 26606 }, { "epoch": 0.9398072312562641, "grad_norm": 1.8468923568725586, "learning_rate": 9.470446723900018e-08, "loss": 0.7496, "step": 26607 }, { "epoch": 0.939842553059972, "grad_norm": 1.7088721990585327, "learning_rate": 9.459369609489955e-08, "loss": 0.7761, "step": 26608 }, { "epoch": 0.9398778748636799, "grad_norm": 1.7343111038208008, "learning_rate": 9.448298915183508e-08, "loss": 0.7249, "step": 26609 }, { "epoch": 0.9399131966673878, "grad_norm": 1.7524679899215698, "learning_rate": 9.437234641125503e-08, "loss": 0.772, "step": 26610 }, { "epoch": 0.9399485184710957, "grad_norm": 1.58749520778656, "learning_rate": 9.426176787460717e-08, "loss": 0.7772, "step": 26611 }, { "epoch": 0.9399838402748036, "grad_norm": 1.6469647884368896, "learning_rate": 9.415125354333976e-08, "loss": 0.7995, "step": 26612 }, { "epoch": 0.9400191620785115, "grad_norm": 2.1950507164001465, "learning_rate": 9.404080341889776e-08, "loss": 0.7614, "step": 26613 }, { "epoch": 0.9400544838822195, "grad_norm": 1.6634033918380737, "learning_rate": 9.39304175027278e-08, "loss": 0.7514, "step": 26614 }, { "epoch": 0.9400898056859274, "grad_norm": 1.9205528497695923, "learning_rate": 9.382009579627482e-08, "loss": 0.7865, "step": 26615 }, { "epoch": 0.9401251274896353, "grad_norm": 1.750283122062683, "learning_rate": 9.370983830098157e-08, "loss": 0.7323, "step": 26616 }, { "epoch": 0.9401604492933432, "grad_norm": 1.7084510326385498, "learning_rate": 9.359964501829189e-08, "loss": 0.7656, "step": 26617 }, { "epoch": 0.9401957710970511, "grad_norm": 1.7062443494796753, "learning_rate": 9.348951594964739e-08, "loss": 0.7604, "step": 26618 }, { "epoch": 0.940231092900759, "grad_norm": 1.6557883024215698, "learning_rate": 9.337945109649083e-08, "loss": 0.7367, "step": 26619 }, { "epoch": 0.9402664147044669, "grad_norm": 1.7667824029922485, "learning_rate": 9.326945046026048e-08, "loss": 0.751, "step": 26620 }, { "epoch": 0.9403017365081748, "grad_norm": 1.6137923002243042, "learning_rate": 9.315951404239798e-08, "loss": 0.773, "step": 26621 }, { "epoch": 0.9403370583118827, "grad_norm": 1.6414178609848022, "learning_rate": 9.304964184434162e-08, "loss": 0.7563, "step": 26622 }, { "epoch": 0.9403723801155907, "grad_norm": 1.6908824443817139, "learning_rate": 9.293983386752859e-08, "loss": 0.767, "step": 26623 }, { "epoch": 0.9404077019192985, "grad_norm": 1.612000584602356, "learning_rate": 9.283009011339606e-08, "loss": 0.7413, "step": 26624 }, { "epoch": 0.9404430237230064, "grad_norm": 1.6745305061340332, "learning_rate": 9.272041058338177e-08, "loss": 0.7814, "step": 26625 }, { "epoch": 0.9404783455267143, "grad_norm": 1.88188636302948, "learning_rate": 9.261079527892015e-08, "loss": 0.7437, "step": 26626 }, { "epoch": 0.9405136673304222, "grad_norm": 2.142843008041382, "learning_rate": 9.250124420144557e-08, "loss": 0.7519, "step": 26627 }, { "epoch": 0.9405489891341301, "grad_norm": 1.7026779651641846, "learning_rate": 9.239175735239191e-08, "loss": 0.7779, "step": 26628 }, { "epoch": 0.940584310937838, "grad_norm": 1.769621729850769, "learning_rate": 9.228233473319303e-08, "loss": 0.7563, "step": 26629 }, { "epoch": 0.9406196327415459, "grad_norm": 1.8160202503204346, "learning_rate": 9.217297634528055e-08, "loss": 0.7438, "step": 26630 }, { "epoch": 0.9406549545452538, "grad_norm": 1.8808544874191284, "learning_rate": 9.206368219008499e-08, "loss": 0.807, "step": 26631 }, { "epoch": 0.9406902763489617, "grad_norm": 1.925182819366455, "learning_rate": 9.195445226903798e-08, "loss": 0.7692, "step": 26632 }, { "epoch": 0.9407255981526697, "grad_norm": 1.5278946161270142, "learning_rate": 9.184528658356784e-08, "loss": 0.7621, "step": 26633 }, { "epoch": 0.9407609199563776, "grad_norm": 1.5755140781402588, "learning_rate": 9.173618513510396e-08, "loss": 0.7247, "step": 26634 }, { "epoch": 0.9407962417600855, "grad_norm": 1.6032058000564575, "learning_rate": 9.162714792507465e-08, "loss": 0.7542, "step": 26635 }, { "epoch": 0.9408315635637934, "grad_norm": 1.803924322128296, "learning_rate": 9.151817495490656e-08, "loss": 0.7491, "step": 26636 }, { "epoch": 0.9408668853675013, "grad_norm": 1.569283127784729, "learning_rate": 9.140926622602575e-08, "loss": 0.761, "step": 26637 }, { "epoch": 0.9409022071712092, "grad_norm": 1.742056131362915, "learning_rate": 9.130042173985776e-08, "loss": 0.7532, "step": 26638 }, { "epoch": 0.9409375289749171, "grad_norm": 1.6866216659545898, "learning_rate": 9.1191641497827e-08, "loss": 0.7734, "step": 26639 }, { "epoch": 0.940972850778625, "grad_norm": 1.77774977684021, "learning_rate": 9.108292550135789e-08, "loss": 0.7306, "step": 26640 }, { "epoch": 0.9410081725823329, "grad_norm": 2.0146214962005615, "learning_rate": 9.097427375187207e-08, "loss": 0.795, "step": 26641 }, { "epoch": 0.9410434943860408, "grad_norm": 1.742630958557129, "learning_rate": 9.086568625079228e-08, "loss": 0.7716, "step": 26642 }, { "epoch": 0.9410788161897488, "grad_norm": 1.5638277530670166, "learning_rate": 9.075716299953963e-08, "loss": 0.7252, "step": 26643 }, { "epoch": 0.9411141379934567, "grad_norm": 1.7320361137390137, "learning_rate": 9.064870399953407e-08, "loss": 0.7216, "step": 26644 }, { "epoch": 0.9411494597971646, "grad_norm": 2.0824501514434814, "learning_rate": 9.054030925219614e-08, "loss": 0.7696, "step": 26645 }, { "epoch": 0.9411847816008725, "grad_norm": 1.5661122798919678, "learning_rate": 9.043197875894305e-08, "loss": 0.7553, "step": 26646 }, { "epoch": 0.9412201034045804, "grad_norm": 1.652647852897644, "learning_rate": 9.032371252119365e-08, "loss": 0.7607, "step": 26647 }, { "epoch": 0.9412554252082883, "grad_norm": 1.8377152681350708, "learning_rate": 9.021551054036459e-08, "loss": 0.7795, "step": 26648 }, { "epoch": 0.9412907470119962, "grad_norm": 1.6638693809509277, "learning_rate": 9.010737281787196e-08, "loss": 0.7572, "step": 26649 }, { "epoch": 0.941326068815704, "grad_norm": 1.7297348976135254, "learning_rate": 8.999929935513074e-08, "loss": 0.7456, "step": 26650 }, { "epoch": 0.9413613906194119, "grad_norm": 1.6778820753097534, "learning_rate": 8.989129015355646e-08, "loss": 0.7444, "step": 26651 }, { "epoch": 0.9413967124231198, "grad_norm": 1.7759073972702026, "learning_rate": 8.978334521456133e-08, "loss": 0.769, "step": 26652 }, { "epoch": 0.9414320342268278, "grad_norm": 2.0865237712860107, "learning_rate": 8.967546453955867e-08, "loss": 0.785, "step": 26653 }, { "epoch": 0.9414673560305357, "grad_norm": 1.648375391960144, "learning_rate": 8.956764812996121e-08, "loss": 0.7525, "step": 26654 }, { "epoch": 0.9415026778342436, "grad_norm": 2.178650140762329, "learning_rate": 8.945989598717841e-08, "loss": 0.772, "step": 26655 }, { "epoch": 0.9415379996379515, "grad_norm": 1.8133164644241333, "learning_rate": 8.93522081126219e-08, "loss": 0.7871, "step": 26656 }, { "epoch": 0.9415733214416594, "grad_norm": 1.9024708271026611, "learning_rate": 8.924458450770112e-08, "loss": 0.7317, "step": 26657 }, { "epoch": 0.9416086432453673, "grad_norm": 1.7246222496032715, "learning_rate": 8.913702517382328e-08, "loss": 0.7542, "step": 26658 }, { "epoch": 0.9416439650490752, "grad_norm": 1.6618667840957642, "learning_rate": 8.902953011239724e-08, "loss": 0.7575, "step": 26659 }, { "epoch": 0.9416792868527831, "grad_norm": 1.6802728176116943, "learning_rate": 8.892209932482965e-08, "loss": 0.7773, "step": 26660 }, { "epoch": 0.941714608656491, "grad_norm": 1.7547930479049683, "learning_rate": 8.881473281252605e-08, "loss": 0.7799, "step": 26661 }, { "epoch": 0.941749930460199, "grad_norm": 1.595116376876831, "learning_rate": 8.870743057689257e-08, "loss": 0.7182, "step": 26662 }, { "epoch": 0.9417852522639069, "grad_norm": 2.1720848083496094, "learning_rate": 8.86001926193325e-08, "loss": 0.7247, "step": 26663 }, { "epoch": 0.9418205740676148, "grad_norm": 1.6180126667022705, "learning_rate": 8.849301894125028e-08, "loss": 0.7711, "step": 26664 }, { "epoch": 0.9418558958713227, "grad_norm": 2.626427173614502, "learning_rate": 8.838590954404758e-08, "loss": 0.759, "step": 26665 }, { "epoch": 0.9418912176750306, "grad_norm": 1.816320776939392, "learning_rate": 8.827886442912714e-08, "loss": 0.7675, "step": 26666 }, { "epoch": 0.9419265394787385, "grad_norm": 1.6738882064819336, "learning_rate": 8.817188359789008e-08, "loss": 0.7613, "step": 26667 }, { "epoch": 0.9419618612824464, "grad_norm": 1.6495047807693481, "learning_rate": 8.806496705173528e-08, "loss": 0.7404, "step": 26668 }, { "epoch": 0.9419971830861543, "grad_norm": 1.5484766960144043, "learning_rate": 8.795811479206329e-08, "loss": 0.7661, "step": 26669 }, { "epoch": 0.9420325048898622, "grad_norm": 1.89218008518219, "learning_rate": 8.785132682027243e-08, "loss": 0.7141, "step": 26670 }, { "epoch": 0.9420678266935701, "grad_norm": 1.7738043069839478, "learning_rate": 8.774460313775935e-08, "loss": 0.7457, "step": 26671 }, { "epoch": 0.942103148497278, "grad_norm": 1.9417906999588013, "learning_rate": 8.763794374592183e-08, "loss": 0.7737, "step": 26672 }, { "epoch": 0.942138470300986, "grad_norm": 3.7530040740966797, "learning_rate": 8.753134864615543e-08, "loss": 0.7766, "step": 26673 }, { "epoch": 0.9421737921046939, "grad_norm": 2.008835554122925, "learning_rate": 8.742481783985513e-08, "loss": 0.7412, "step": 26674 }, { "epoch": 0.9422091139084018, "grad_norm": 1.7808072566986084, "learning_rate": 8.731835132841593e-08, "loss": 0.7709, "step": 26675 }, { "epoch": 0.9422444357121096, "grad_norm": 1.683688759803772, "learning_rate": 8.721194911323005e-08, "loss": 0.7689, "step": 26676 }, { "epoch": 0.9422797575158175, "grad_norm": 1.7555029392242432, "learning_rate": 8.710561119569139e-08, "loss": 0.7446, "step": 26677 }, { "epoch": 0.9423150793195254, "grad_norm": 1.7129762172698975, "learning_rate": 8.699933757719047e-08, "loss": 0.7644, "step": 26678 }, { "epoch": 0.9423504011232333, "grad_norm": 1.6437381505966187, "learning_rate": 8.689312825911844e-08, "loss": 0.7925, "step": 26679 }, { "epoch": 0.9423857229269412, "grad_norm": 1.6027995347976685, "learning_rate": 8.678698324286638e-08, "loss": 0.762, "step": 26680 }, { "epoch": 0.9424210447306491, "grad_norm": 1.6052356958389282, "learning_rate": 8.668090252982153e-08, "loss": 0.7721, "step": 26681 }, { "epoch": 0.942456366534357, "grad_norm": 1.7267152070999146, "learning_rate": 8.657488612137444e-08, "loss": 0.7431, "step": 26682 }, { "epoch": 0.942491688338065, "grad_norm": 2.0040335655212402, "learning_rate": 8.64689340189112e-08, "loss": 0.7814, "step": 26683 }, { "epoch": 0.9425270101417729, "grad_norm": 1.6478877067565918, "learning_rate": 8.636304622381908e-08, "loss": 0.7616, "step": 26684 }, { "epoch": 0.9425623319454808, "grad_norm": 1.6756149530410767, "learning_rate": 8.625722273748305e-08, "loss": 0.7766, "step": 26685 }, { "epoch": 0.9425976537491887, "grad_norm": 1.6246176958084106, "learning_rate": 8.615146356128923e-08, "loss": 0.7674, "step": 26686 }, { "epoch": 0.9426329755528966, "grad_norm": 1.7886806726455688, "learning_rate": 8.604576869662207e-08, "loss": 0.757, "step": 26687 }, { "epoch": 0.9426682973566045, "grad_norm": 1.6701418161392212, "learning_rate": 8.594013814486324e-08, "loss": 0.7623, "step": 26688 }, { "epoch": 0.9427036191603124, "grad_norm": 1.674301028251648, "learning_rate": 8.583457190739664e-08, "loss": 0.7403, "step": 26689 }, { "epoch": 0.9427389409640203, "grad_norm": 2.0757553577423096, "learning_rate": 8.572906998560338e-08, "loss": 0.7452, "step": 26690 }, { "epoch": 0.9427742627677282, "grad_norm": 1.709233045578003, "learning_rate": 8.562363238086403e-08, "loss": 0.748, "step": 26691 }, { "epoch": 0.9428095845714362, "grad_norm": 1.6339714527130127, "learning_rate": 8.551825909455857e-08, "loss": 0.8132, "step": 26692 }, { "epoch": 0.9428449063751441, "grad_norm": 1.5992693901062012, "learning_rate": 8.541295012806706e-08, "loss": 0.7422, "step": 26693 }, { "epoch": 0.942880228178852, "grad_norm": 1.7426731586456299, "learning_rate": 8.530770548276612e-08, "loss": 0.7559, "step": 26694 }, { "epoch": 0.9429155499825599, "grad_norm": 1.7393302917480469, "learning_rate": 8.520252516003468e-08, "loss": 0.7798, "step": 26695 }, { "epoch": 0.9429508717862678, "grad_norm": 1.8265280723571777, "learning_rate": 8.509740916124887e-08, "loss": 0.7884, "step": 26696 }, { "epoch": 0.9429861935899757, "grad_norm": 1.0552866458892822, "learning_rate": 8.499235748778422e-08, "loss": 0.5781, "step": 26697 }, { "epoch": 0.9430215153936836, "grad_norm": 1.6766871213912964, "learning_rate": 8.488737014101523e-08, "loss": 0.7478, "step": 26698 }, { "epoch": 0.9430568371973915, "grad_norm": 1.7114601135253906, "learning_rate": 8.478244712231631e-08, "loss": 0.7645, "step": 26699 }, { "epoch": 0.9430921590010994, "grad_norm": 1.7133221626281738, "learning_rate": 8.46775884330614e-08, "loss": 0.7507, "step": 26700 }, { "epoch": 0.9431274808048073, "grad_norm": 1.7180088758468628, "learning_rate": 8.45727940746216e-08, "loss": 0.7592, "step": 26701 }, { "epoch": 0.9431628026085151, "grad_norm": 1.6159334182739258, "learning_rate": 8.446806404836916e-08, "loss": 0.741, "step": 26702 }, { "epoch": 0.9431981244122231, "grad_norm": 1.6131430864334106, "learning_rate": 8.43633983556752e-08, "loss": 0.747, "step": 26703 }, { "epoch": 0.943233446215931, "grad_norm": 1.5580480098724365, "learning_rate": 8.425879699790861e-08, "loss": 0.7592, "step": 26704 }, { "epoch": 0.9432687680196389, "grad_norm": 2.0927326679229736, "learning_rate": 8.41542599764389e-08, "loss": 0.7456, "step": 26705 }, { "epoch": 0.9433040898233468, "grad_norm": 0.8800868988037109, "learning_rate": 8.404978729263436e-08, "loss": 0.5752, "step": 26706 }, { "epoch": 0.9433394116270547, "grad_norm": 1.8354310989379883, "learning_rate": 8.39453789478617e-08, "loss": 0.7624, "step": 26707 }, { "epoch": 0.9433747334307626, "grad_norm": 1.837100863456726, "learning_rate": 8.384103494348817e-08, "loss": 0.7609, "step": 26708 }, { "epoch": 0.9434100552344705, "grad_norm": 1.838816523551941, "learning_rate": 8.373675528087876e-08, "loss": 0.7397, "step": 26709 }, { "epoch": 0.9434453770381784, "grad_norm": 1.644523024559021, "learning_rate": 8.363253996139908e-08, "loss": 0.7581, "step": 26710 }, { "epoch": 0.9434806988418863, "grad_norm": 6.57502555847168, "learning_rate": 8.352838898641191e-08, "loss": 0.7333, "step": 26711 }, { "epoch": 0.9435160206455943, "grad_norm": 1.6185638904571533, "learning_rate": 8.342430235728116e-08, "loss": 0.7845, "step": 26712 }, { "epoch": 0.9435513424493022, "grad_norm": 1.5972312688827515, "learning_rate": 8.332028007536908e-08, "loss": 0.7673, "step": 26713 }, { "epoch": 0.9435866642530101, "grad_norm": 1.6825557947158813, "learning_rate": 8.321632214203678e-08, "loss": 0.7541, "step": 26714 }, { "epoch": 0.943621986056718, "grad_norm": 1.7887805700302124, "learning_rate": 8.311242855864543e-08, "loss": 0.7787, "step": 26715 }, { "epoch": 0.9436573078604259, "grad_norm": 1.663629412651062, "learning_rate": 8.300859932655391e-08, "loss": 0.7508, "step": 26716 }, { "epoch": 0.9436926296641338, "grad_norm": 1.6330305337905884, "learning_rate": 8.290483444712171e-08, "loss": 0.7219, "step": 26717 }, { "epoch": 0.9437279514678417, "grad_norm": 5.651036739349365, "learning_rate": 8.280113392170608e-08, "loss": 0.7879, "step": 26718 }, { "epoch": 0.9437632732715496, "grad_norm": 1.7056273221969604, "learning_rate": 8.269749775166591e-08, "loss": 0.729, "step": 26719 }, { "epoch": 0.9437985950752575, "grad_norm": 1.7408572435379028, "learning_rate": 8.259392593835624e-08, "loss": 0.759, "step": 26720 }, { "epoch": 0.9438339168789655, "grad_norm": 1.7862145900726318, "learning_rate": 8.249041848313211e-08, "loss": 0.8021, "step": 26721 }, { "epoch": 0.9438692386826734, "grad_norm": 1.988420844078064, "learning_rate": 8.238697538735019e-08, "loss": 0.7764, "step": 26722 }, { "epoch": 0.9439045604863813, "grad_norm": 2.8485748767852783, "learning_rate": 8.228359665236219e-08, "loss": 0.733, "step": 26723 }, { "epoch": 0.9439398822900892, "grad_norm": 1.7936638593673706, "learning_rate": 8.218028227952257e-08, "loss": 0.7685, "step": 26724 }, { "epoch": 0.9439752040937971, "grad_norm": 2.9457309246063232, "learning_rate": 8.207703227018305e-08, "loss": 0.7906, "step": 26725 }, { "epoch": 0.944010525897505, "grad_norm": 1.662734866142273, "learning_rate": 8.197384662569419e-08, "loss": 0.7704, "step": 26726 }, { "epoch": 0.9440458477012129, "grad_norm": 1.5405179262161255, "learning_rate": 8.187072534740769e-08, "loss": 0.7598, "step": 26727 }, { "epoch": 0.9440811695049207, "grad_norm": 1.7734793424606323, "learning_rate": 8.17676684366725e-08, "loss": 0.7802, "step": 26728 }, { "epoch": 0.9441164913086286, "grad_norm": 1.7215991020202637, "learning_rate": 8.166467589483751e-08, "loss": 0.8101, "step": 26729 }, { "epoch": 0.9441518131123365, "grad_norm": 1.7666537761688232, "learning_rate": 8.156174772325054e-08, "loss": 0.7918, "step": 26730 }, { "epoch": 0.9441871349160444, "grad_norm": 1.8630844354629517, "learning_rate": 8.14588839232594e-08, "loss": 0.7927, "step": 26731 }, { "epoch": 0.9442224567197524, "grad_norm": 1.6350922584533691, "learning_rate": 8.135608449620914e-08, "loss": 0.757, "step": 26732 }, { "epoch": 0.9442577785234603, "grad_norm": 1.6065346002578735, "learning_rate": 8.125334944344643e-08, "loss": 0.7351, "step": 26733 }, { "epoch": 0.9442931003271682, "grad_norm": 1.7202118635177612, "learning_rate": 8.115067876631466e-08, "loss": 0.7511, "step": 26734 }, { "epoch": 0.9443284221308761, "grad_norm": 1.836205244064331, "learning_rate": 8.104807246615886e-08, "loss": 0.7531, "step": 26735 }, { "epoch": 0.944363743934584, "grad_norm": 1.6111279726028442, "learning_rate": 8.094553054432075e-08, "loss": 0.762, "step": 26736 }, { "epoch": 0.9443990657382919, "grad_norm": 1.6884573698043823, "learning_rate": 8.084305300214257e-08, "loss": 0.8053, "step": 26737 }, { "epoch": 0.9444343875419998, "grad_norm": 2.080629348754883, "learning_rate": 8.074063984096659e-08, "loss": 0.7451, "step": 26738 }, { "epoch": 0.9444697093457077, "grad_norm": 1.7445456981658936, "learning_rate": 8.063829106213173e-08, "loss": 0.7554, "step": 26739 }, { "epoch": 0.9445050311494156, "grad_norm": 1.4839057922363281, "learning_rate": 8.053600666697802e-08, "loss": 0.7336, "step": 26740 }, { "epoch": 0.9445403529531236, "grad_norm": 1.860956072807312, "learning_rate": 8.043378665684498e-08, "loss": 0.7706, "step": 26741 }, { "epoch": 0.9445756747568315, "grad_norm": 1.7783042192459106, "learning_rate": 8.033163103306984e-08, "loss": 0.7598, "step": 26742 }, { "epoch": 0.9446109965605394, "grad_norm": 1.7355023622512817, "learning_rate": 8.022953979698878e-08, "loss": 0.7547, "step": 26743 }, { "epoch": 0.9446463183642473, "grad_norm": 1.6498693227767944, "learning_rate": 8.012751294993904e-08, "loss": 0.7769, "step": 26744 }, { "epoch": 0.9446816401679552, "grad_norm": 2.4814982414245605, "learning_rate": 8.002555049325566e-08, "loss": 0.8031, "step": 26745 }, { "epoch": 0.9447169619716631, "grad_norm": 1.716283917427063, "learning_rate": 7.992365242827316e-08, "loss": 0.7678, "step": 26746 }, { "epoch": 0.944752283775371, "grad_norm": 1.7832247018814087, "learning_rate": 7.982181875632433e-08, "loss": 0.7885, "step": 26747 }, { "epoch": 0.9447876055790789, "grad_norm": 1.7139345407485962, "learning_rate": 7.972004947874368e-08, "loss": 0.7517, "step": 26748 }, { "epoch": 0.9448229273827868, "grad_norm": 1.577532172203064, "learning_rate": 7.961834459686125e-08, "loss": 0.7387, "step": 26749 }, { "epoch": 0.9448582491864947, "grad_norm": 1.7216500043869019, "learning_rate": 7.951670411200929e-08, "loss": 0.7637, "step": 26750 }, { "epoch": 0.9448935709902027, "grad_norm": 1.6589939594268799, "learning_rate": 7.941512802551843e-08, "loss": 0.7891, "step": 26751 }, { "epoch": 0.9449288927939106, "grad_norm": 1.8204318284988403, "learning_rate": 7.931361633871649e-08, "loss": 0.7606, "step": 26752 }, { "epoch": 0.9449642145976185, "grad_norm": 1.7414958477020264, "learning_rate": 7.921216905293294e-08, "loss": 0.8046, "step": 26753 }, { "epoch": 0.9449995364013263, "grad_norm": 1.7859320640563965, "learning_rate": 7.911078616949564e-08, "loss": 0.7363, "step": 26754 }, { "epoch": 0.9450348582050342, "grad_norm": 2.113847494125366, "learning_rate": 7.900946768973184e-08, "loss": 0.7378, "step": 26755 }, { "epoch": 0.9450701800087421, "grad_norm": 1.9512505531311035, "learning_rate": 7.890821361496659e-08, "loss": 0.7402, "step": 26756 }, { "epoch": 0.94510550181245, "grad_norm": 1.6903231143951416, "learning_rate": 7.88070239465255e-08, "loss": 0.7997, "step": 26757 }, { "epoch": 0.9451408236161579, "grad_norm": 1.7319583892822266, "learning_rate": 7.870589868573365e-08, "loss": 0.7695, "step": 26758 }, { "epoch": 0.9451761454198658, "grad_norm": 1.7108235359191895, "learning_rate": 7.860483783391327e-08, "loss": 0.7976, "step": 26759 }, { "epoch": 0.9452114672235737, "grad_norm": 1.7395901679992676, "learning_rate": 7.850384139238775e-08, "loss": 0.7613, "step": 26760 }, { "epoch": 0.9452467890272817, "grad_norm": 1.7731218338012695, "learning_rate": 7.84029093624794e-08, "loss": 0.7639, "step": 26761 }, { "epoch": 0.9452821108309896, "grad_norm": 1.8299968242645264, "learning_rate": 7.830204174550771e-08, "loss": 0.7759, "step": 26762 }, { "epoch": 0.9453174326346975, "grad_norm": 1.645835280418396, "learning_rate": 7.820123854279437e-08, "loss": 0.7576, "step": 26763 }, { "epoch": 0.9453527544384054, "grad_norm": 1.6613818407058716, "learning_rate": 7.810049975565781e-08, "loss": 0.778, "step": 26764 }, { "epoch": 0.9453880762421133, "grad_norm": 1.6551604270935059, "learning_rate": 7.79998253854164e-08, "loss": 0.7208, "step": 26765 }, { "epoch": 0.9454233980458212, "grad_norm": 1.5645980834960938, "learning_rate": 7.789921543338852e-08, "loss": 0.7501, "step": 26766 }, { "epoch": 0.9454587198495291, "grad_norm": 1.6441819667816162, "learning_rate": 7.779866990089036e-08, "loss": 0.7482, "step": 26767 }, { "epoch": 0.945494041653237, "grad_norm": 1.5218054056167603, "learning_rate": 7.769818878923751e-08, "loss": 0.7371, "step": 26768 }, { "epoch": 0.9455293634569449, "grad_norm": 1.9274296760559082, "learning_rate": 7.759777209974617e-08, "loss": 0.7756, "step": 26769 }, { "epoch": 0.9455646852606528, "grad_norm": 1.8165432214736938, "learning_rate": 7.749741983372972e-08, "loss": 0.7784, "step": 26770 }, { "epoch": 0.9456000070643608, "grad_norm": 1.6864012479782104, "learning_rate": 7.739713199250154e-08, "loss": 0.7508, "step": 26771 }, { "epoch": 0.9456353288680687, "grad_norm": 2.3239901065826416, "learning_rate": 7.729690857737392e-08, "loss": 0.7821, "step": 26772 }, { "epoch": 0.9456706506717766, "grad_norm": 1.6944862604141235, "learning_rate": 7.719674958965917e-08, "loss": 0.7459, "step": 26773 }, { "epoch": 0.9457059724754845, "grad_norm": 1.6092476844787598, "learning_rate": 7.709665503066843e-08, "loss": 0.7497, "step": 26774 }, { "epoch": 0.9457412942791924, "grad_norm": 1.8312777280807495, "learning_rate": 7.699662490171122e-08, "loss": 0.7276, "step": 26775 }, { "epoch": 0.9457766160829003, "grad_norm": 2.3396904468536377, "learning_rate": 7.68966592040965e-08, "loss": 0.7862, "step": 26776 }, { "epoch": 0.9458119378866082, "grad_norm": 1.7123500108718872, "learning_rate": 7.679675793913321e-08, "loss": 0.7653, "step": 26777 }, { "epoch": 0.9458472596903161, "grad_norm": 1.6339740753173828, "learning_rate": 7.66969211081281e-08, "loss": 0.717, "step": 26778 }, { "epoch": 0.945882581494024, "grad_norm": 1.9091428518295288, "learning_rate": 7.659714871238788e-08, "loss": 0.8015, "step": 26779 }, { "epoch": 0.9459179032977318, "grad_norm": 1.7255353927612305, "learning_rate": 7.64974407532193e-08, "loss": 0.7595, "step": 26780 }, { "epoch": 0.9459532251014398, "grad_norm": 1.7401262521743774, "learning_rate": 7.63977972319263e-08, "loss": 0.7592, "step": 26781 }, { "epoch": 0.9459885469051477, "grad_norm": 1.65879225730896, "learning_rate": 7.629821814981342e-08, "loss": 0.7889, "step": 26782 }, { "epoch": 0.9460238687088556, "grad_norm": 1.6990342140197754, "learning_rate": 7.619870350818459e-08, "loss": 0.7572, "step": 26783 }, { "epoch": 0.9460591905125635, "grad_norm": 1.768803358078003, "learning_rate": 7.6099253308341e-08, "loss": 0.7672, "step": 26784 }, { "epoch": 0.9460945123162714, "grad_norm": 1.6197514533996582, "learning_rate": 7.59998675515844e-08, "loss": 0.746, "step": 26785 }, { "epoch": 0.9461298341199793, "grad_norm": 1.7378880977630615, "learning_rate": 7.590054623921705e-08, "loss": 0.7705, "step": 26786 }, { "epoch": 0.9461651559236872, "grad_norm": 1.8172245025634766, "learning_rate": 7.580128937253684e-08, "loss": 0.7301, "step": 26787 }, { "epoch": 0.9462004777273951, "grad_norm": 1.6137452125549316, "learning_rate": 7.57020969528438e-08, "loss": 0.7963, "step": 26788 }, { "epoch": 0.946235799531103, "grad_norm": 1.6774579286575317, "learning_rate": 7.560296898143637e-08, "loss": 0.7561, "step": 26789 }, { "epoch": 0.946271121334811, "grad_norm": 1.7690213918685913, "learning_rate": 7.550390545961129e-08, "loss": 0.7547, "step": 26790 }, { "epoch": 0.9463064431385189, "grad_norm": 1.7145951986312866, "learning_rate": 7.540490638866527e-08, "loss": 0.7377, "step": 26791 }, { "epoch": 0.9463417649422268, "grad_norm": 1.59676194190979, "learning_rate": 7.530597176989452e-08, "loss": 0.761, "step": 26792 }, { "epoch": 0.9463770867459347, "grad_norm": 1.701102375984192, "learning_rate": 7.520710160459355e-08, "loss": 0.7458, "step": 26793 }, { "epoch": 0.9464124085496426, "grad_norm": 2.2355058193206787, "learning_rate": 7.510829589405577e-08, "loss": 0.7809, "step": 26794 }, { "epoch": 0.9464477303533505, "grad_norm": 1.6359213590621948, "learning_rate": 7.50095546395746e-08, "loss": 0.7562, "step": 26795 }, { "epoch": 0.9464830521570584, "grad_norm": 2.034074544906616, "learning_rate": 7.491087784244344e-08, "loss": 0.7958, "step": 26796 }, { "epoch": 0.9465183739607663, "grad_norm": 1.730062484741211, "learning_rate": 7.481226550395238e-08, "loss": 0.7335, "step": 26797 }, { "epoch": 0.9465536957644742, "grad_norm": 1.7167222499847412, "learning_rate": 7.471371762539258e-08, "loss": 0.7593, "step": 26798 }, { "epoch": 0.9465890175681821, "grad_norm": 1.6173300743103027, "learning_rate": 7.461523420805416e-08, "loss": 0.7761, "step": 26799 }, { "epoch": 0.9466243393718901, "grad_norm": 1.6534957885742188, "learning_rate": 7.451681525322551e-08, "loss": 0.7502, "step": 26800 }, { "epoch": 0.946659661175598, "grad_norm": 1.5848379135131836, "learning_rate": 7.44184607621945e-08, "loss": 0.7503, "step": 26801 }, { "epoch": 0.9466949829793059, "grad_norm": 1.7806538343429565, "learning_rate": 7.432017073624898e-08, "loss": 0.7272, "step": 26802 }, { "epoch": 0.9467303047830138, "grad_norm": 1.7971662282943726, "learning_rate": 7.422194517667514e-08, "loss": 0.7827, "step": 26803 }, { "epoch": 0.9467656265867217, "grad_norm": 1.864479899406433, "learning_rate": 7.412378408475862e-08, "loss": 0.7428, "step": 26804 }, { "epoch": 0.9468009483904296, "grad_norm": 1.9668630361557007, "learning_rate": 7.402568746178396e-08, "loss": 0.785, "step": 26805 }, { "epoch": 0.9468362701941374, "grad_norm": 1.8329445123672485, "learning_rate": 7.392765530903567e-08, "loss": 0.7757, "step": 26806 }, { "epoch": 0.9468715919978453, "grad_norm": 1.7133530378341675, "learning_rate": 7.382968762779552e-08, "loss": 0.7314, "step": 26807 }, { "epoch": 0.9469069138015532, "grad_norm": 1.7142890691757202, "learning_rate": 7.373178441934691e-08, "loss": 0.7384, "step": 26808 }, { "epoch": 0.9469422356052611, "grad_norm": 1.6552504301071167, "learning_rate": 7.363394568497051e-08, "loss": 0.7512, "step": 26809 }, { "epoch": 0.946977557408969, "grad_norm": 1.5776170492172241, "learning_rate": 7.353617142594638e-08, "loss": 0.7471, "step": 26810 }, { "epoch": 0.947012879212677, "grad_norm": 1.7862099409103394, "learning_rate": 7.343846164355518e-08, "loss": 0.7684, "step": 26811 }, { "epoch": 0.9470482010163849, "grad_norm": 1.7695904970169067, "learning_rate": 7.33408163390753e-08, "loss": 0.7389, "step": 26812 }, { "epoch": 0.9470835228200928, "grad_norm": 1.5522409677505493, "learning_rate": 7.32432355137852e-08, "loss": 0.7467, "step": 26813 }, { "epoch": 0.9471188446238007, "grad_norm": 1.845847487449646, "learning_rate": 7.314571916896108e-08, "loss": 0.7684, "step": 26814 }, { "epoch": 0.9471541664275086, "grad_norm": 1.790905475616455, "learning_rate": 7.30482673058791e-08, "loss": 0.7664, "step": 26815 }, { "epoch": 0.9471894882312165, "grad_norm": 1.6885387897491455, "learning_rate": 7.295087992581606e-08, "loss": 0.746, "step": 26816 }, { "epoch": 0.9472248100349244, "grad_norm": 1.6894326210021973, "learning_rate": 7.285355703004538e-08, "loss": 0.7898, "step": 26817 }, { "epoch": 0.9472601318386323, "grad_norm": 1.7073867321014404, "learning_rate": 7.275629861984102e-08, "loss": 0.7501, "step": 26818 }, { "epoch": 0.9472954536423402, "grad_norm": 1.7912609577178955, "learning_rate": 7.265910469647641e-08, "loss": 0.7533, "step": 26819 }, { "epoch": 0.9473307754460482, "grad_norm": 1.7134439945220947, "learning_rate": 7.256197526122278e-08, "loss": 0.758, "step": 26820 }, { "epoch": 0.9473660972497561, "grad_norm": 4.377477169036865, "learning_rate": 7.246491031535186e-08, "loss": 0.7697, "step": 26821 }, { "epoch": 0.947401419053464, "grad_norm": 1.663718581199646, "learning_rate": 7.236790986013431e-08, "loss": 0.7944, "step": 26822 }, { "epoch": 0.9474367408571719, "grad_norm": 2.1543338298797607, "learning_rate": 7.227097389683913e-08, "loss": 0.7711, "step": 26823 }, { "epoch": 0.9474720626608798, "grad_norm": 1.7451297044754028, "learning_rate": 7.217410242673473e-08, "loss": 0.7378, "step": 26824 }, { "epoch": 0.9475073844645877, "grad_norm": 1.5874228477478027, "learning_rate": 7.207729545108954e-08, "loss": 0.7386, "step": 26825 }, { "epoch": 0.9475427062682956, "grad_norm": 0.9518580436706543, "learning_rate": 7.19805529711709e-08, "loss": 0.574, "step": 26826 }, { "epoch": 0.9475780280720035, "grad_norm": 1.9349844455718994, "learning_rate": 7.188387498824334e-08, "loss": 0.7766, "step": 26827 }, { "epoch": 0.9476133498757114, "grad_norm": 1.7787526845932007, "learning_rate": 7.178726150357418e-08, "loss": 0.7412, "step": 26828 }, { "epoch": 0.9476486716794194, "grad_norm": 1.6976720094680786, "learning_rate": 7.169071251842685e-08, "loss": 0.7399, "step": 26829 }, { "epoch": 0.9476839934831273, "grad_norm": 1.7339578866958618, "learning_rate": 7.159422803406424e-08, "loss": 0.762, "step": 26830 }, { "epoch": 0.9477193152868352, "grad_norm": 2.0423152446746826, "learning_rate": 7.149780805175033e-08, "loss": 0.7999, "step": 26831 }, { "epoch": 0.9477546370905431, "grad_norm": 1.7884547710418701, "learning_rate": 7.140145257274689e-08, "loss": 0.7691, "step": 26832 }, { "epoch": 0.9477899588942509, "grad_norm": 2.387995719909668, "learning_rate": 7.130516159831458e-08, "loss": 0.7441, "step": 26833 }, { "epoch": 0.9478252806979588, "grad_norm": 1.7484813928604126, "learning_rate": 7.120893512971406e-08, "loss": 0.7737, "step": 26834 }, { "epoch": 0.9478606025016667, "grad_norm": 1.6988699436187744, "learning_rate": 7.111277316820431e-08, "loss": 0.7408, "step": 26835 }, { "epoch": 0.9478959243053746, "grad_norm": 1.8643772602081299, "learning_rate": 7.101667571504433e-08, "loss": 0.7304, "step": 26836 }, { "epoch": 0.9479312461090825, "grad_norm": 1.5459442138671875, "learning_rate": 7.092064277149091e-08, "loss": 0.7616, "step": 26837 }, { "epoch": 0.9479665679127904, "grad_norm": 1.5987823009490967, "learning_rate": 7.082467433880247e-08, "loss": 0.7765, "step": 26838 }, { "epoch": 0.9480018897164983, "grad_norm": 1.89188551902771, "learning_rate": 7.072877041823301e-08, "loss": 0.7615, "step": 26839 }, { "epoch": 0.9480372115202063, "grad_norm": 1.8776311874389648, "learning_rate": 7.06329310110393e-08, "loss": 0.7817, "step": 26840 }, { "epoch": 0.9480725333239142, "grad_norm": 1.5737910270690918, "learning_rate": 7.053715611847533e-08, "loss": 0.7519, "step": 26841 }, { "epoch": 0.9481078551276221, "grad_norm": 1.55905020236969, "learning_rate": 7.044144574179457e-08, "loss": 0.7349, "step": 26842 }, { "epoch": 0.94814317693133, "grad_norm": 4.343306064605713, "learning_rate": 7.034579988224932e-08, "loss": 0.7854, "step": 26843 }, { "epoch": 0.9481784987350379, "grad_norm": 1.6568716764450073, "learning_rate": 7.025021854109138e-08, "loss": 0.7441, "step": 26844 }, { "epoch": 0.9482138205387458, "grad_norm": 1.6046689748764038, "learning_rate": 7.015470171957251e-08, "loss": 0.7301, "step": 26845 }, { "epoch": 0.9482491423424537, "grad_norm": 2.15543532371521, "learning_rate": 7.005924941894115e-08, "loss": 0.7957, "step": 26846 }, { "epoch": 0.9482844641461616, "grad_norm": 1.575213074684143, "learning_rate": 6.996386164044855e-08, "loss": 0.7584, "step": 26847 }, { "epoch": 0.9483197859498695, "grad_norm": 1.9291425943374634, "learning_rate": 6.986853838534202e-08, "loss": 0.7671, "step": 26848 }, { "epoch": 0.9483551077535775, "grad_norm": 1.6819119453430176, "learning_rate": 6.97732796548689e-08, "loss": 0.7425, "step": 26849 }, { "epoch": 0.9483904295572854, "grad_norm": 1.6387972831726074, "learning_rate": 6.967808545027599e-08, "loss": 0.7478, "step": 26850 }, { "epoch": 0.9484257513609933, "grad_norm": 1.7707653045654297, "learning_rate": 6.95829557728106e-08, "loss": 0.7549, "step": 26851 }, { "epoch": 0.9484610731647012, "grad_norm": 1.5946640968322754, "learning_rate": 6.948789062371564e-08, "loss": 0.7242, "step": 26852 }, { "epoch": 0.9484963949684091, "grad_norm": 1.7521774768829346, "learning_rate": 6.939289000423677e-08, "loss": 0.7293, "step": 26853 }, { "epoch": 0.948531716772117, "grad_norm": 1.641066074371338, "learning_rate": 6.929795391561689e-08, "loss": 0.7682, "step": 26854 }, { "epoch": 0.9485670385758249, "grad_norm": 1.6355842351913452, "learning_rate": 6.920308235909834e-08, "loss": 0.7323, "step": 26855 }, { "epoch": 0.9486023603795328, "grad_norm": 1.698530673980713, "learning_rate": 6.910827533592346e-08, "loss": 0.7488, "step": 26856 }, { "epoch": 0.9486376821832407, "grad_norm": 1.8057427406311035, "learning_rate": 6.901353284733237e-08, "loss": 0.7646, "step": 26857 }, { "epoch": 0.9486730039869486, "grad_norm": 1.8294490575790405, "learning_rate": 6.891885489456462e-08, "loss": 0.7531, "step": 26858 }, { "epoch": 0.9487083257906564, "grad_norm": 1.6183676719665527, "learning_rate": 6.882424147886036e-08, "loss": 0.7305, "step": 26859 }, { "epoch": 0.9487436475943644, "grad_norm": 1.83164644241333, "learning_rate": 6.872969260145745e-08, "loss": 0.7852, "step": 26860 }, { "epoch": 0.9487789693980723, "grad_norm": 1.6852737665176392, "learning_rate": 6.863520826359383e-08, "loss": 0.7525, "step": 26861 }, { "epoch": 0.9488142912017802, "grad_norm": 1.8243824243545532, "learning_rate": 6.854078846650569e-08, "loss": 0.7745, "step": 26862 }, { "epoch": 0.9488496130054881, "grad_norm": 1.6628327369689941, "learning_rate": 6.844643321142818e-08, "loss": 0.7358, "step": 26863 }, { "epoch": 0.948884934809196, "grad_norm": 1.782747507095337, "learning_rate": 6.835214249959699e-08, "loss": 0.7354, "step": 26864 }, { "epoch": 0.9489202566129039, "grad_norm": 1.6117430925369263, "learning_rate": 6.825791633224554e-08, "loss": 0.7384, "step": 26865 }, { "epoch": 0.9489555784166118, "grad_norm": 1.5332669019699097, "learning_rate": 6.816375471060733e-08, "loss": 0.6905, "step": 26866 }, { "epoch": 0.9489909002203197, "grad_norm": 1.821996808052063, "learning_rate": 6.806965763591523e-08, "loss": 0.7494, "step": 26867 }, { "epoch": 0.9490262220240276, "grad_norm": 1.7156561613082886, "learning_rate": 6.797562510939992e-08, "loss": 0.771, "step": 26868 }, { "epoch": 0.9490615438277356, "grad_norm": 2.0363094806671143, "learning_rate": 6.788165713229266e-08, "loss": 0.767, "step": 26869 }, { "epoch": 0.9490968656314435, "grad_norm": 1.7833887338638306, "learning_rate": 6.778775370582303e-08, "loss": 0.7603, "step": 26870 }, { "epoch": 0.9491321874351514, "grad_norm": 1.6230844259262085, "learning_rate": 6.769391483122057e-08, "loss": 0.7578, "step": 26871 }, { "epoch": 0.9491675092388593, "grad_norm": 1.7892018556594849, "learning_rate": 6.760014050971264e-08, "loss": 0.767, "step": 26872 }, { "epoch": 0.9492028310425672, "grad_norm": 1.7787278890609741, "learning_rate": 6.750643074252717e-08, "loss": 0.7571, "step": 26873 }, { "epoch": 0.9492381528462751, "grad_norm": 1.7540769577026367, "learning_rate": 6.741278553089036e-08, "loss": 0.7281, "step": 26874 }, { "epoch": 0.949273474649983, "grad_norm": 2.763713836669922, "learning_rate": 6.731920487602738e-08, "loss": 0.7709, "step": 26875 }, { "epoch": 0.9493087964536909, "grad_norm": 1.6101112365722656, "learning_rate": 6.722568877916336e-08, "loss": 0.7554, "step": 26876 }, { "epoch": 0.9493441182573988, "grad_norm": 1.5953853130340576, "learning_rate": 6.713223724152285e-08, "loss": 0.7736, "step": 26877 }, { "epoch": 0.9493794400611067, "grad_norm": 1.6721978187561035, "learning_rate": 6.703885026432766e-08, "loss": 0.791, "step": 26878 }, { "epoch": 0.9494147618648147, "grad_norm": 1.6232925653457642, "learning_rate": 6.694552784880126e-08, "loss": 0.7248, "step": 26879 }, { "epoch": 0.9494500836685226, "grad_norm": 1.8240717649459839, "learning_rate": 6.685226999616434e-08, "loss": 0.7633, "step": 26880 }, { "epoch": 0.9494854054722305, "grad_norm": 1.6458531618118286, "learning_rate": 6.675907670763759e-08, "loss": 0.7972, "step": 26881 }, { "epoch": 0.9495207272759384, "grad_norm": 1.6330143213272095, "learning_rate": 6.666594798444059e-08, "loss": 0.7572, "step": 26882 }, { "epoch": 0.9495560490796463, "grad_norm": 1.7359488010406494, "learning_rate": 6.657288382779236e-08, "loss": 0.7588, "step": 26883 }, { "epoch": 0.9495913708833542, "grad_norm": 1.5692617893218994, "learning_rate": 6.647988423891083e-08, "loss": 0.7016, "step": 26884 }, { "epoch": 0.949626692687062, "grad_norm": 1.6311721801757812, "learning_rate": 6.638694921901278e-08, "loss": 0.7321, "step": 26885 }, { "epoch": 0.9496620144907699, "grad_norm": 1.6513906717300415, "learning_rate": 6.62940787693156e-08, "loss": 0.7625, "step": 26886 }, { "epoch": 0.9496973362944778, "grad_norm": 1.5932984352111816, "learning_rate": 6.620127289103384e-08, "loss": 0.7537, "step": 26887 }, { "epoch": 0.9497326580981857, "grad_norm": 1.6624150276184082, "learning_rate": 6.610853158538211e-08, "loss": 0.7488, "step": 26888 }, { "epoch": 0.9497679799018937, "grad_norm": 1.9500986337661743, "learning_rate": 6.601585485357442e-08, "loss": 0.7663, "step": 26889 }, { "epoch": 0.9498033017056016, "grad_norm": 1.003523826599121, "learning_rate": 6.592324269682426e-08, "loss": 0.5759, "step": 26890 }, { "epoch": 0.9498386235093095, "grad_norm": 1.884831428527832, "learning_rate": 6.583069511634288e-08, "loss": 0.7845, "step": 26891 }, { "epoch": 0.9498739453130174, "grad_norm": 1.762891411781311, "learning_rate": 6.573821211334152e-08, "loss": 0.7946, "step": 26892 }, { "epoch": 0.9499092671167253, "grad_norm": 1.815258264541626, "learning_rate": 6.564579368903145e-08, "loss": 0.7616, "step": 26893 }, { "epoch": 0.9499445889204332, "grad_norm": 1.8825708627700806, "learning_rate": 6.555343984462115e-08, "loss": 0.7538, "step": 26894 }, { "epoch": 0.9499799107241411, "grad_norm": 1.8565287590026855, "learning_rate": 6.546115058132019e-08, "loss": 0.7409, "step": 26895 }, { "epoch": 0.950015232527849, "grad_norm": 1.5556436777114868, "learning_rate": 6.53689259003365e-08, "loss": 0.7199, "step": 26896 }, { "epoch": 0.9500505543315569, "grad_norm": 2.019594192504883, "learning_rate": 6.527676580287579e-08, "loss": 0.7419, "step": 26897 }, { "epoch": 0.9500858761352649, "grad_norm": 1.5282131433486938, "learning_rate": 6.518467029014541e-08, "loss": 0.7396, "step": 26898 }, { "epoch": 0.9501211979389728, "grad_norm": 1.6549253463745117, "learning_rate": 6.509263936335109e-08, "loss": 0.7578, "step": 26899 }, { "epoch": 0.9501565197426807, "grad_norm": 1.614570140838623, "learning_rate": 6.500067302369628e-08, "loss": 0.7512, "step": 26900 }, { "epoch": 0.9501918415463886, "grad_norm": 1.8445395231246948, "learning_rate": 6.490877127238504e-08, "loss": 0.7753, "step": 26901 }, { "epoch": 0.9502271633500965, "grad_norm": 1.653857707977295, "learning_rate": 6.481693411061974e-08, "loss": 0.773, "step": 26902 }, { "epoch": 0.9502624851538044, "grad_norm": 1.7974599599838257, "learning_rate": 6.472516153960329e-08, "loss": 0.7758, "step": 26903 }, { "epoch": 0.9502978069575123, "grad_norm": 1.8508985042572021, "learning_rate": 6.46334535605353e-08, "loss": 0.7705, "step": 26904 }, { "epoch": 0.9503331287612202, "grad_norm": 1.5652918815612793, "learning_rate": 6.454181017461758e-08, "loss": 0.7663, "step": 26905 }, { "epoch": 0.9503684505649281, "grad_norm": 1.7725633382797241, "learning_rate": 6.445023138304862e-08, "loss": 0.7819, "step": 26906 }, { "epoch": 0.950403772368636, "grad_norm": 1.7546640634536743, "learning_rate": 6.435871718702746e-08, "loss": 0.7633, "step": 26907 }, { "epoch": 0.950439094172344, "grad_norm": 1.8128902912139893, "learning_rate": 6.426726758775148e-08, "loss": 0.7562, "step": 26908 }, { "epoch": 0.9504744159760519, "grad_norm": 1.7561078071594238, "learning_rate": 6.417588258641805e-08, "loss": 0.7453, "step": 26909 }, { "epoch": 0.9505097377797598, "grad_norm": 2.0319831371307373, "learning_rate": 6.408456218422231e-08, "loss": 0.8079, "step": 26910 }, { "epoch": 0.9505450595834676, "grad_norm": 1.8349993228912354, "learning_rate": 6.399330638236001e-08, "loss": 0.7405, "step": 26911 }, { "epoch": 0.9505803813871755, "grad_norm": 1.5933586359024048, "learning_rate": 6.390211518202572e-08, "loss": 0.7091, "step": 26912 }, { "epoch": 0.9506157031908834, "grad_norm": 1.6154570579528809, "learning_rate": 6.381098858441236e-08, "loss": 0.7211, "step": 26913 }, { "epoch": 0.9506510249945913, "grad_norm": 1.650091290473938, "learning_rate": 6.371992659071292e-08, "loss": 0.7346, "step": 26914 }, { "epoch": 0.9506863467982992, "grad_norm": 1.8112660646438599, "learning_rate": 6.362892920211972e-08, "loss": 0.7822, "step": 26915 }, { "epoch": 0.9507216686020071, "grad_norm": 1.764315128326416, "learning_rate": 6.353799641982239e-08, "loss": 0.7562, "step": 26916 }, { "epoch": 0.950756990405715, "grad_norm": 1.5558615922927856, "learning_rate": 6.344712824501221e-08, "loss": 0.7576, "step": 26917 }, { "epoch": 0.950792312209423, "grad_norm": 1.7828682661056519, "learning_rate": 6.33563246788782e-08, "loss": 0.7253, "step": 26918 }, { "epoch": 0.9508276340131309, "grad_norm": 1.8468246459960938, "learning_rate": 6.326558572260833e-08, "loss": 0.7094, "step": 26919 }, { "epoch": 0.9508629558168388, "grad_norm": 1.726055383682251, "learning_rate": 6.317491137739052e-08, "loss": 0.7839, "step": 26920 }, { "epoch": 0.9508982776205467, "grad_norm": 2.4844934940338135, "learning_rate": 6.308430164441159e-08, "loss": 0.7746, "step": 26921 }, { "epoch": 0.9509335994242546, "grad_norm": 1.5942943096160889, "learning_rate": 6.299375652485729e-08, "loss": 0.7639, "step": 26922 }, { "epoch": 0.9509689212279625, "grad_norm": 1.6556808948516846, "learning_rate": 6.29032760199122e-08, "loss": 0.7518, "step": 26923 }, { "epoch": 0.9510042430316704, "grad_norm": 1.9183828830718994, "learning_rate": 6.281286013076148e-08, "loss": 0.7757, "step": 26924 }, { "epoch": 0.9510395648353783, "grad_norm": 1.7299891710281372, "learning_rate": 6.27225088585881e-08, "loss": 0.7469, "step": 26925 }, { "epoch": 0.9510748866390862, "grad_norm": 1.6636568307876587, "learning_rate": 6.263222220457387e-08, "loss": 0.7685, "step": 26926 }, { "epoch": 0.9511102084427941, "grad_norm": 1.6694538593292236, "learning_rate": 6.25420001699012e-08, "loss": 0.7775, "step": 26927 }, { "epoch": 0.9511455302465021, "grad_norm": 1.592095136642456, "learning_rate": 6.245184275575022e-08, "loss": 0.7775, "step": 26928 }, { "epoch": 0.95118085205021, "grad_norm": 1.701378345489502, "learning_rate": 6.236174996330224e-08, "loss": 0.7669, "step": 26929 }, { "epoch": 0.9512161738539179, "grad_norm": 1.964172124862671, "learning_rate": 6.22717217937352e-08, "loss": 0.7492, "step": 26930 }, { "epoch": 0.9512514956576258, "grad_norm": 1.74015474319458, "learning_rate": 6.218175824822703e-08, "loss": 0.7776, "step": 26931 }, { "epoch": 0.9512868174613337, "grad_norm": 1.7824914455413818, "learning_rate": 6.20918593279568e-08, "loss": 0.7731, "step": 26932 }, { "epoch": 0.9513221392650416, "grad_norm": 1.762876272201538, "learning_rate": 6.200202503409969e-08, "loss": 0.7853, "step": 26933 }, { "epoch": 0.9513574610687495, "grad_norm": 1.6755315065383911, "learning_rate": 6.191225536783141e-08, "loss": 0.7142, "step": 26934 }, { "epoch": 0.9513927828724574, "grad_norm": 1.6245636940002441, "learning_rate": 6.182255033032769e-08, "loss": 0.7531, "step": 26935 }, { "epoch": 0.9514281046761653, "grad_norm": 1.7027355432510376, "learning_rate": 6.173290992276259e-08, "loss": 0.7649, "step": 26936 }, { "epoch": 0.9514634264798731, "grad_norm": 1.6548837423324585, "learning_rate": 6.164333414630852e-08, "loss": 0.7565, "step": 26937 }, { "epoch": 0.951498748283581, "grad_norm": 1.7820956707000732, "learning_rate": 6.155382300213786e-08, "loss": 0.7734, "step": 26938 }, { "epoch": 0.951534070087289, "grad_norm": 1.6634576320648193, "learning_rate": 6.146437649142244e-08, "loss": 0.7428, "step": 26939 }, { "epoch": 0.9515693918909969, "grad_norm": 1.621565580368042, "learning_rate": 6.137499461533302e-08, "loss": 0.7462, "step": 26940 }, { "epoch": 0.9516047136947048, "grad_norm": 2.5017151832580566, "learning_rate": 6.128567737503976e-08, "loss": 0.7723, "step": 26941 }, { "epoch": 0.9516400354984127, "grad_norm": 1.8896703720092773, "learning_rate": 6.119642477171061e-08, "loss": 0.7374, "step": 26942 }, { "epoch": 0.9516753573021206, "grad_norm": 1.706292748451233, "learning_rate": 6.110723680651465e-08, "loss": 0.7392, "step": 26943 }, { "epoch": 0.9517106791058285, "grad_norm": 0.8934009075164795, "learning_rate": 6.101811348061815e-08, "loss": 0.5571, "step": 26944 }, { "epoch": 0.9517460009095364, "grad_norm": 1.8163000345230103, "learning_rate": 6.092905479518906e-08, "loss": 0.7695, "step": 26945 }, { "epoch": 0.9517813227132443, "grad_norm": 1.5545508861541748, "learning_rate": 6.084006075139148e-08, "loss": 0.7675, "step": 26946 }, { "epoch": 0.9518166445169522, "grad_norm": 1.7087230682373047, "learning_rate": 6.075113135039057e-08, "loss": 0.7734, "step": 26947 }, { "epoch": 0.9518519663206602, "grad_norm": 1.636002540588379, "learning_rate": 6.066226659335095e-08, "loss": 0.7703, "step": 26948 }, { "epoch": 0.9518872881243681, "grad_norm": 1.7221802473068237, "learning_rate": 6.057346648143447e-08, "loss": 0.7993, "step": 26949 }, { "epoch": 0.951922609928076, "grad_norm": 1.6579678058624268, "learning_rate": 6.04847310158041e-08, "loss": 0.7638, "step": 26950 }, { "epoch": 0.9519579317317839, "grad_norm": 1.7541563510894775, "learning_rate": 6.039606019762168e-08, "loss": 0.742, "step": 26951 }, { "epoch": 0.9519932535354918, "grad_norm": 1.6795343160629272, "learning_rate": 6.030745402804627e-08, "loss": 0.7947, "step": 26952 }, { "epoch": 0.9520285753391997, "grad_norm": 2.0745155811309814, "learning_rate": 6.021891250823863e-08, "loss": 0.7448, "step": 26953 }, { "epoch": 0.9520638971429076, "grad_norm": 1.610620141029358, "learning_rate": 6.013043563935783e-08, "loss": 0.7656, "step": 26954 }, { "epoch": 0.9520992189466155, "grad_norm": 1.6618562936782837, "learning_rate": 6.004202342256071e-08, "loss": 0.7713, "step": 26955 }, { "epoch": 0.9521345407503234, "grad_norm": 1.8030792474746704, "learning_rate": 5.995367585900525e-08, "loss": 0.7709, "step": 26956 }, { "epoch": 0.9521698625540314, "grad_norm": 1.630283236503601, "learning_rate": 5.986539294984716e-08, "loss": 0.737, "step": 26957 }, { "epoch": 0.9522051843577393, "grad_norm": 1.5945128202438354, "learning_rate": 5.977717469624223e-08, "loss": 0.7644, "step": 26958 }, { "epoch": 0.9522405061614472, "grad_norm": 1.6290582418441772, "learning_rate": 5.968902109934505e-08, "loss": 0.7407, "step": 26959 }, { "epoch": 0.9522758279651551, "grad_norm": 1.7750478982925415, "learning_rate": 5.9600932160309155e-08, "loss": 0.7799, "step": 26960 }, { "epoch": 0.952311149768863, "grad_norm": 1.6538026332855225, "learning_rate": 5.951290788028752e-08, "loss": 0.7644, "step": 26961 }, { "epoch": 0.9523464715725709, "grad_norm": 1.491774559020996, "learning_rate": 5.942494826043255e-08, "loss": 0.7394, "step": 26962 }, { "epoch": 0.9523817933762787, "grad_norm": 1.8458623886108398, "learning_rate": 5.933705330189499e-08, "loss": 0.7509, "step": 26963 }, { "epoch": 0.9524171151799866, "grad_norm": 1.619560718536377, "learning_rate": 5.9249223005825587e-08, "loss": 0.7773, "step": 26964 }, { "epoch": 0.9524524369836945, "grad_norm": 0.9306973218917847, "learning_rate": 5.9161457373372864e-08, "loss": 0.5766, "step": 26965 }, { "epoch": 0.9524877587874024, "grad_norm": 2.1546151638031006, "learning_rate": 5.907375640568647e-08, "loss": 0.7972, "step": 26966 }, { "epoch": 0.9525230805911103, "grad_norm": 2.024888038635254, "learning_rate": 5.8986120103914354e-08, "loss": 0.7581, "step": 26967 }, { "epoch": 0.9525584023948183, "grad_norm": 1.8388383388519287, "learning_rate": 5.889854846920284e-08, "loss": 0.7791, "step": 26968 }, { "epoch": 0.9525937241985262, "grad_norm": 1.656777262687683, "learning_rate": 5.881104150269823e-08, "loss": 0.7773, "step": 26969 }, { "epoch": 0.9526290460022341, "grad_norm": 1.8204916715621948, "learning_rate": 5.872359920554627e-08, "loss": 0.7765, "step": 26970 }, { "epoch": 0.952664367805942, "grad_norm": 1.7689045667648315, "learning_rate": 5.8636221578890484e-08, "loss": 0.7517, "step": 26971 }, { "epoch": 0.9526996896096499, "grad_norm": 1.6399344205856323, "learning_rate": 5.8548908623874965e-08, "loss": 0.7644, "step": 26972 }, { "epoch": 0.9527350114133578, "grad_norm": 1.8094227313995361, "learning_rate": 5.8461660341643243e-08, "loss": 0.7365, "step": 26973 }, { "epoch": 0.9527703332170657, "grad_norm": 1.633451223373413, "learning_rate": 5.837447673333552e-08, "loss": 0.7531, "step": 26974 }, { "epoch": 0.9528056550207736, "grad_norm": 1.8737471103668213, "learning_rate": 5.82873578000942e-08, "loss": 0.7574, "step": 26975 }, { "epoch": 0.9528409768244815, "grad_norm": 1.634716272354126, "learning_rate": 5.820030354305839e-08, "loss": 0.7299, "step": 26976 }, { "epoch": 0.9528762986281895, "grad_norm": 1.7409937381744385, "learning_rate": 5.8113313963368836e-08, "loss": 0.7535, "step": 26977 }, { "epoch": 0.9529116204318974, "grad_norm": 1.5888664722442627, "learning_rate": 5.8026389062162955e-08, "loss": 0.7648, "step": 26978 }, { "epoch": 0.9529469422356053, "grad_norm": 1.7338937520980835, "learning_rate": 5.793952884057818e-08, "loss": 0.7601, "step": 26979 }, { "epoch": 0.9529822640393132, "grad_norm": 1.7751914262771606, "learning_rate": 5.785273329975249e-08, "loss": 0.7458, "step": 26980 }, { "epoch": 0.9530175858430211, "grad_norm": 1.635035753250122, "learning_rate": 5.776600244082109e-08, "loss": 0.7408, "step": 26981 }, { "epoch": 0.953052907646729, "grad_norm": 1.7382179498672485, "learning_rate": 5.7679336264919174e-08, "loss": 0.7747, "step": 26982 }, { "epoch": 0.9530882294504369, "grad_norm": 1.817456841468811, "learning_rate": 5.759273477318084e-08, "loss": 0.756, "step": 26983 }, { "epoch": 0.9531235512541448, "grad_norm": 1.5522327423095703, "learning_rate": 5.7506197966740175e-08, "loss": 0.7238, "step": 26984 }, { "epoch": 0.9531588730578527, "grad_norm": 1.652536392211914, "learning_rate": 5.74197258467285e-08, "loss": 0.7606, "step": 26985 }, { "epoch": 0.9531941948615607, "grad_norm": 1.9017505645751953, "learning_rate": 5.73333184142788e-08, "loss": 0.7257, "step": 26986 }, { "epoch": 0.9532295166652686, "grad_norm": 1.5707783699035645, "learning_rate": 5.724697567052184e-08, "loss": 0.7457, "step": 26987 }, { "epoch": 0.9532648384689765, "grad_norm": 1.5899232625961304, "learning_rate": 5.716069761658727e-08, "loss": 0.7743, "step": 26988 }, { "epoch": 0.9533001602726843, "grad_norm": 1.7888538837432861, "learning_rate": 5.7074484253603624e-08, "loss": 0.7725, "step": 26989 }, { "epoch": 0.9533354820763922, "grad_norm": 1.8206206560134888, "learning_rate": 5.6988335582701114e-08, "loss": 0.7355, "step": 26990 }, { "epoch": 0.9533708038801001, "grad_norm": 1.8875752687454224, "learning_rate": 5.690225160500551e-08, "loss": 0.7739, "step": 26991 }, { "epoch": 0.953406125683808, "grad_norm": 1.6206985712051392, "learning_rate": 5.681623232164368e-08, "loss": 0.7391, "step": 26992 }, { "epoch": 0.9534414474875159, "grad_norm": 1.6379048824310303, "learning_rate": 5.6730277733742514e-08, "loss": 0.7692, "step": 26993 }, { "epoch": 0.9534767692912238, "grad_norm": 1.5944085121154785, "learning_rate": 5.664438784242554e-08, "loss": 0.7886, "step": 26994 }, { "epoch": 0.9535120910949317, "grad_norm": 1.8442307710647583, "learning_rate": 5.655856264881743e-08, "loss": 0.7667, "step": 26995 }, { "epoch": 0.9535474128986396, "grad_norm": 1.6955417394638062, "learning_rate": 5.6472802154042274e-08, "loss": 0.7834, "step": 26996 }, { "epoch": 0.9535827347023476, "grad_norm": 1.666049838066101, "learning_rate": 5.63871063592214e-08, "loss": 0.7369, "step": 26997 }, { "epoch": 0.9536180565060555, "grad_norm": 1.977091908454895, "learning_rate": 5.630147526547669e-08, "loss": 0.7585, "step": 26998 }, { "epoch": 0.9536533783097634, "grad_norm": 1.9050803184509277, "learning_rate": 5.621590887392947e-08, "loss": 0.7762, "step": 26999 }, { "epoch": 0.9536887001134713, "grad_norm": 2.0032260417938232, "learning_rate": 5.6130407185698846e-08, "loss": 0.7292, "step": 27000 }, { "epoch": 0.9537240219171792, "grad_norm": 1.6996747255325317, "learning_rate": 5.6044970201903915e-08, "loss": 0.7586, "step": 27001 }, { "epoch": 0.9537593437208871, "grad_norm": 1.5397919416427612, "learning_rate": 5.595959792366323e-08, "loss": 0.7432, "step": 27002 }, { "epoch": 0.953794665524595, "grad_norm": 1.582289695739746, "learning_rate": 5.5874290352093684e-08, "loss": 0.741, "step": 27003 }, { "epoch": 0.9538299873283029, "grad_norm": 1.6014341115951538, "learning_rate": 5.5789047488312154e-08, "loss": 0.7724, "step": 27004 }, { "epoch": 0.9538653091320108, "grad_norm": 1.6905747652053833, "learning_rate": 5.5703869333434415e-08, "loss": 0.7533, "step": 27005 }, { "epoch": 0.9539006309357188, "grad_norm": 1.7555941343307495, "learning_rate": 5.5618755888575124e-08, "loss": 0.7715, "step": 27006 }, { "epoch": 0.9539359527394267, "grad_norm": 1.6448183059692383, "learning_rate": 5.553370715484729e-08, "loss": 0.7552, "step": 27007 }, { "epoch": 0.9539712745431346, "grad_norm": 1.7611995935440063, "learning_rate": 5.544872313336558e-08, "loss": 0.7689, "step": 27008 }, { "epoch": 0.9540065963468425, "grad_norm": 1.7130547761917114, "learning_rate": 5.5363803825241316e-08, "loss": 0.7518, "step": 27009 }, { "epoch": 0.9540419181505504, "grad_norm": 1.7130606174468994, "learning_rate": 5.5278949231586387e-08, "loss": 0.7777, "step": 27010 }, { "epoch": 0.9540772399542583, "grad_norm": 1.7046507596969604, "learning_rate": 5.5194159353511026e-08, "loss": 0.7892, "step": 27011 }, { "epoch": 0.9541125617579662, "grad_norm": 1.7199817895889282, "learning_rate": 5.510943419212433e-08, "loss": 0.7546, "step": 27012 }, { "epoch": 0.9541478835616741, "grad_norm": 1.7243082523345947, "learning_rate": 5.5024773748536543e-08, "loss": 0.7441, "step": 27013 }, { "epoch": 0.954183205365382, "grad_norm": 1.746026873588562, "learning_rate": 5.494017802385454e-08, "loss": 0.7325, "step": 27014 }, { "epoch": 0.9542185271690898, "grad_norm": 1.6405577659606934, "learning_rate": 5.4855647019185774e-08, "loss": 0.7984, "step": 27015 }, { "epoch": 0.9542538489727977, "grad_norm": 1.7589868307113647, "learning_rate": 5.477118073563714e-08, "loss": 0.7618, "step": 27016 }, { "epoch": 0.9542891707765057, "grad_norm": 1.680931806564331, "learning_rate": 5.468677917431331e-08, "loss": 0.7541, "step": 27017 }, { "epoch": 0.9543244925802136, "grad_norm": 1.9240472316741943, "learning_rate": 5.4602442336319504e-08, "loss": 0.7367, "step": 27018 }, { "epoch": 0.9543598143839215, "grad_norm": 0.9107245802879333, "learning_rate": 5.4518170222759845e-08, "loss": 0.5565, "step": 27019 }, { "epoch": 0.9543951361876294, "grad_norm": 1.5717617273330688, "learning_rate": 5.4433962834735674e-08, "loss": 0.7706, "step": 27020 }, { "epoch": 0.9544304579913373, "grad_norm": 2.011863946914673, "learning_rate": 5.4349820173350556e-08, "loss": 0.7596, "step": 27021 }, { "epoch": 0.9544657797950452, "grad_norm": 1.7717970609664917, "learning_rate": 5.426574223970582e-08, "loss": 0.7603, "step": 27022 }, { "epoch": 0.9545011015987531, "grad_norm": 1.7141116857528687, "learning_rate": 5.41817290349006e-08, "loss": 0.781, "step": 27023 }, { "epoch": 0.954536423402461, "grad_norm": 1.9033890962600708, "learning_rate": 5.4097780560035674e-08, "loss": 0.7438, "step": 27024 }, { "epoch": 0.9545717452061689, "grad_norm": 1.73811936378479, "learning_rate": 5.401389681620906e-08, "loss": 0.7492, "step": 27025 }, { "epoch": 0.9546070670098769, "grad_norm": 0.95331871509552, "learning_rate": 5.393007780451876e-08, "loss": 0.6018, "step": 27026 }, { "epoch": 0.9546423888135848, "grad_norm": 1.8865858316421509, "learning_rate": 5.3846323526062225e-08, "loss": 0.7081, "step": 27027 }, { "epoch": 0.9546777106172927, "grad_norm": 1.6978217363357544, "learning_rate": 5.376263398193471e-08, "loss": 0.7797, "step": 27028 }, { "epoch": 0.9547130324210006, "grad_norm": 1.573887586593628, "learning_rate": 5.3679009173232545e-08, "loss": 0.7259, "step": 27029 }, { "epoch": 0.9547483542247085, "grad_norm": 2.267317056655884, "learning_rate": 5.35954491010493e-08, "loss": 0.7634, "step": 27030 }, { "epoch": 0.9547836760284164, "grad_norm": 1.8041850328445435, "learning_rate": 5.3511953766479106e-08, "loss": 0.781, "step": 27031 }, { "epoch": 0.9548189978321243, "grad_norm": 1.675057053565979, "learning_rate": 5.342852317061498e-08, "loss": 0.7849, "step": 27032 }, { "epoch": 0.9548543196358322, "grad_norm": 1.773155689239502, "learning_rate": 5.334515731454826e-08, "loss": 0.7633, "step": 27033 }, { "epoch": 0.9548896414395401, "grad_norm": 1.7833436727523804, "learning_rate": 5.326185619936974e-08, "loss": 0.7329, "step": 27034 }, { "epoch": 0.954924963243248, "grad_norm": 1.725521206855774, "learning_rate": 5.317861982617079e-08, "loss": 0.7657, "step": 27035 }, { "epoch": 0.954960285046956, "grad_norm": 1.780629277229309, "learning_rate": 5.3095448196039956e-08, "loss": 0.7456, "step": 27036 }, { "epoch": 0.9549956068506639, "grad_norm": 1.0137662887573242, "learning_rate": 5.301234131006583e-08, "loss": 0.5801, "step": 27037 }, { "epoch": 0.9550309286543718, "grad_norm": 1.6974003314971924, "learning_rate": 5.292929916933698e-08, "loss": 0.7811, "step": 27038 }, { "epoch": 0.9550662504580797, "grad_norm": 1.7490102052688599, "learning_rate": 5.2846321774938646e-08, "loss": 0.7749, "step": 27039 }, { "epoch": 0.9551015722617876, "grad_norm": 1.8243334293365479, "learning_rate": 5.276340912795774e-08, "loss": 0.7744, "step": 27040 }, { "epoch": 0.9551368940654954, "grad_norm": 1.646327257156372, "learning_rate": 5.268056122948007e-08, "loss": 0.7292, "step": 27041 }, { "epoch": 0.9551722158692033, "grad_norm": 1.8717288970947266, "learning_rate": 5.2597778080588635e-08, "loss": 0.7333, "step": 27042 }, { "epoch": 0.9552075376729112, "grad_norm": 1.686937689781189, "learning_rate": 5.2515059682367585e-08, "loss": 0.7734, "step": 27043 }, { "epoch": 0.9552428594766191, "grad_norm": 1.8427938222885132, "learning_rate": 5.243240603589939e-08, "loss": 0.7598, "step": 27044 }, { "epoch": 0.955278181280327, "grad_norm": 1.6759496927261353, "learning_rate": 5.2349817142265947e-08, "loss": 0.7578, "step": 27045 }, { "epoch": 0.955313503084035, "grad_norm": 1.7399897575378418, "learning_rate": 5.226729300254807e-08, "loss": 0.7629, "step": 27046 }, { "epoch": 0.9553488248877429, "grad_norm": 1.6834344863891602, "learning_rate": 5.2184833617825445e-08, "loss": 0.7211, "step": 27047 }, { "epoch": 0.9553841466914508, "grad_norm": 2.1640737056732178, "learning_rate": 5.210243898917833e-08, "loss": 0.7611, "step": 27048 }, { "epoch": 0.9554194684951587, "grad_norm": 1.7744243144989014, "learning_rate": 5.2020109117683625e-08, "loss": 0.7638, "step": 27049 }, { "epoch": 0.9554547902988666, "grad_norm": 1.7889251708984375, "learning_rate": 5.1937844004419925e-08, "loss": 0.7805, "step": 27050 }, { "epoch": 0.9554901121025745, "grad_norm": 1.6684130430221558, "learning_rate": 5.1855643650463586e-08, "loss": 0.7991, "step": 27051 }, { "epoch": 0.9555254339062824, "grad_norm": 1.7014042139053345, "learning_rate": 5.177350805689041e-08, "loss": 0.7545, "step": 27052 }, { "epoch": 0.9555607557099903, "grad_norm": 1.74889075756073, "learning_rate": 5.16914372247751e-08, "loss": 0.7714, "step": 27053 }, { "epoch": 0.9555960775136982, "grad_norm": 1.7612791061401367, "learning_rate": 5.160943115519235e-08, "loss": 0.7952, "step": 27054 }, { "epoch": 0.9556313993174061, "grad_norm": 1.6894408464431763, "learning_rate": 5.1527489849215185e-08, "loss": 0.7858, "step": 27055 }, { "epoch": 0.9556667211211141, "grad_norm": 1.5973429679870605, "learning_rate": 5.1445613307916086e-08, "loss": 0.7376, "step": 27056 }, { "epoch": 0.955702042924822, "grad_norm": 1.66874361038208, "learning_rate": 5.1363801532366416e-08, "loss": 0.7396, "step": 27057 }, { "epoch": 0.9557373647285299, "grad_norm": 1.8418769836425781, "learning_rate": 5.128205452363755e-08, "loss": 0.7246, "step": 27058 }, { "epoch": 0.9557726865322378, "grad_norm": 1.7715771198272705, "learning_rate": 5.120037228279806e-08, "loss": 0.7673, "step": 27059 }, { "epoch": 0.9558080083359457, "grad_norm": 1.7419294118881226, "learning_rate": 5.111875481091821e-08, "loss": 0.7433, "step": 27060 }, { "epoch": 0.9558433301396536, "grad_norm": 1.7087256908416748, "learning_rate": 5.103720210906604e-08, "loss": 0.7821, "step": 27061 }, { "epoch": 0.9558786519433615, "grad_norm": 1.7604073286056519, "learning_rate": 5.095571417830847e-08, "loss": 0.7708, "step": 27062 }, { "epoch": 0.9559139737470694, "grad_norm": 1.684435248374939, "learning_rate": 5.087429101971242e-08, "loss": 0.7398, "step": 27063 }, { "epoch": 0.9559492955507773, "grad_norm": 1.7034869194030762, "learning_rate": 5.079293263434315e-08, "loss": 0.7661, "step": 27064 }, { "epoch": 0.9559846173544853, "grad_norm": 2.3392574787139893, "learning_rate": 5.071163902326592e-08, "loss": 0.7315, "step": 27065 }, { "epoch": 0.9560199391581932, "grad_norm": 1.7245092391967773, "learning_rate": 5.063041018754378e-08, "loss": 0.7516, "step": 27066 }, { "epoch": 0.956055260961901, "grad_norm": 1.7733981609344482, "learning_rate": 5.054924612824086e-08, "loss": 0.7545, "step": 27067 }, { "epoch": 0.9560905827656089, "grad_norm": 1.7820954322814941, "learning_rate": 5.04681468464191e-08, "loss": 0.7814, "step": 27068 }, { "epoch": 0.9561259045693168, "grad_norm": 1.5775995254516602, "learning_rate": 5.038711234313931e-08, "loss": 0.7219, "step": 27069 }, { "epoch": 0.9561612263730247, "grad_norm": 1.8229883909225464, "learning_rate": 5.0306142619462875e-08, "loss": 0.7841, "step": 27070 }, { "epoch": 0.9561965481767326, "grad_norm": 1.785301685333252, "learning_rate": 5.022523767644949e-08, "loss": 0.7523, "step": 27071 }, { "epoch": 0.9562318699804405, "grad_norm": 1.7026296854019165, "learning_rate": 5.0144397515157206e-08, "loss": 0.7815, "step": 27072 }, { "epoch": 0.9562671917841484, "grad_norm": 1.687389612197876, "learning_rate": 5.006362213664462e-08, "loss": 0.7422, "step": 27073 }, { "epoch": 0.9563025135878563, "grad_norm": 1.6488183736801147, "learning_rate": 4.998291154196977e-08, "loss": 0.7051, "step": 27074 }, { "epoch": 0.9563378353915643, "grad_norm": 1.6843458414077759, "learning_rate": 4.9902265732186815e-08, "loss": 0.7599, "step": 27075 }, { "epoch": 0.9563731571952722, "grad_norm": 2.169663906097412, "learning_rate": 4.9821684708353245e-08, "loss": 0.7808, "step": 27076 }, { "epoch": 0.9564084789989801, "grad_norm": 1.6978317499160767, "learning_rate": 4.974116847152266e-08, "loss": 0.7491, "step": 27077 }, { "epoch": 0.956443800802688, "grad_norm": 1.6247247457504272, "learning_rate": 4.966071702274922e-08, "loss": 0.7427, "step": 27078 }, { "epoch": 0.9564791226063959, "grad_norm": 1.7932859659194946, "learning_rate": 4.958033036308596e-08, "loss": 0.7365, "step": 27079 }, { "epoch": 0.9565144444101038, "grad_norm": 1.9958401918411255, "learning_rate": 4.950000849358427e-08, "loss": 0.7584, "step": 27080 }, { "epoch": 0.9565497662138117, "grad_norm": 1.7912607192993164, "learning_rate": 4.941975141529609e-08, "loss": 0.7973, "step": 27081 }, { "epoch": 0.9565850880175196, "grad_norm": 1.576660394668579, "learning_rate": 4.9339559129271686e-08, "loss": 0.7242, "step": 27082 }, { "epoch": 0.9566204098212275, "grad_norm": 1.9490458965301514, "learning_rate": 4.925943163656077e-08, "loss": 0.7628, "step": 27083 }, { "epoch": 0.9566557316249354, "grad_norm": 1.5729774236679077, "learning_rate": 4.91793689382114e-08, "loss": 0.7245, "step": 27084 }, { "epoch": 0.9566910534286434, "grad_norm": 1.8406894207000732, "learning_rate": 4.9099371035272183e-08, "loss": 0.7769, "step": 27085 }, { "epoch": 0.9567263752323513, "grad_norm": 1.9087581634521484, "learning_rate": 4.901943792878894e-08, "loss": 0.7981, "step": 27086 }, { "epoch": 0.9567616970360592, "grad_norm": 1.9526329040527344, "learning_rate": 4.893956961980917e-08, "loss": 0.7492, "step": 27087 }, { "epoch": 0.9567970188397671, "grad_norm": 1.5698775053024292, "learning_rate": 4.8859766109377596e-08, "loss": 0.753, "step": 27088 }, { "epoch": 0.956832340643475, "grad_norm": 1.83091402053833, "learning_rate": 4.8780027398538375e-08, "loss": 0.7456, "step": 27089 }, { "epoch": 0.9568676624471829, "grad_norm": 3.0648653507232666, "learning_rate": 4.8700353488336235e-08, "loss": 0.7285, "step": 27090 }, { "epoch": 0.9569029842508908, "grad_norm": 2.0084762573242188, "learning_rate": 4.8620744379812565e-08, "loss": 0.7719, "step": 27091 }, { "epoch": 0.9569383060545987, "grad_norm": 1.661742925643921, "learning_rate": 4.854120007400931e-08, "loss": 0.7643, "step": 27092 }, { "epoch": 0.9569736278583065, "grad_norm": 1.83792245388031, "learning_rate": 4.846172057196896e-08, "loss": 0.781, "step": 27093 }, { "epoch": 0.9570089496620144, "grad_norm": 2.0666871070861816, "learning_rate": 4.838230587473014e-08, "loss": 0.7505, "step": 27094 }, { "epoch": 0.9570442714657224, "grad_norm": 1.7443608045578003, "learning_rate": 4.830295598333312e-08, "loss": 0.7629, "step": 27095 }, { "epoch": 0.9570795932694303, "grad_norm": 0.9580636024475098, "learning_rate": 4.822367089881652e-08, "loss": 0.5592, "step": 27096 }, { "epoch": 0.9571149150731382, "grad_norm": 1.7851026058197021, "learning_rate": 4.814445062221729e-08, "loss": 0.7686, "step": 27097 }, { "epoch": 0.9571502368768461, "grad_norm": 1.8700453042984009, "learning_rate": 4.8065295154572924e-08, "loss": 0.7749, "step": 27098 }, { "epoch": 0.957185558680554, "grad_norm": 2.057131290435791, "learning_rate": 4.7986204496919266e-08, "loss": 0.7648, "step": 27099 }, { "epoch": 0.9572208804842619, "grad_norm": 1.7826255559921265, "learning_rate": 4.790717865029104e-08, "loss": 0.7462, "step": 27100 }, { "epoch": 0.9572562022879698, "grad_norm": 1.6618461608886719, "learning_rate": 4.782821761572243e-08, "loss": 0.7717, "step": 27101 }, { "epoch": 0.9572915240916777, "grad_norm": 1.7563422918319702, "learning_rate": 4.7749321394247594e-08, "loss": 0.7322, "step": 27102 }, { "epoch": 0.9573268458953856, "grad_norm": 1.6929973363876343, "learning_rate": 4.767048998689905e-08, "loss": 0.7488, "step": 27103 }, { "epoch": 0.9573621676990935, "grad_norm": 1.9806160926818848, "learning_rate": 4.7591723394707634e-08, "loss": 0.7567, "step": 27104 }, { "epoch": 0.9573974895028015, "grad_norm": 1.7569018602371216, "learning_rate": 4.751302161870475e-08, "loss": 0.744, "step": 27105 }, { "epoch": 0.9574328113065094, "grad_norm": 4.730467796325684, "learning_rate": 4.743438465992067e-08, "loss": 0.777, "step": 27106 }, { "epoch": 0.9574681331102173, "grad_norm": 1.7633172273635864, "learning_rate": 4.735581251938459e-08, "loss": 0.7465, "step": 27107 }, { "epoch": 0.9575034549139252, "grad_norm": 1.8026492595672607, "learning_rate": 4.727730519812401e-08, "loss": 0.7693, "step": 27108 }, { "epoch": 0.9575387767176331, "grad_norm": 1.6139156818389893, "learning_rate": 4.719886269716811e-08, "loss": 0.7801, "step": 27109 }, { "epoch": 0.957574098521341, "grad_norm": 1.8705042600631714, "learning_rate": 4.712048501754163e-08, "loss": 0.7394, "step": 27110 }, { "epoch": 0.9576094203250489, "grad_norm": 2.0960946083068848, "learning_rate": 4.704217216027096e-08, "loss": 0.7369, "step": 27111 }, { "epoch": 0.9576447421287568, "grad_norm": 1.7264916896820068, "learning_rate": 4.696392412638195e-08, "loss": 0.7664, "step": 27112 }, { "epoch": 0.9576800639324647, "grad_norm": 1.6366233825683594, "learning_rate": 4.6885740916897684e-08, "loss": 0.7833, "step": 27113 }, { "epoch": 0.9577153857361727, "grad_norm": 1.8513187170028687, "learning_rate": 4.680762253284177e-08, "loss": 0.777, "step": 27114 }, { "epoch": 0.9577507075398806, "grad_norm": 1.5139604806900024, "learning_rate": 4.672956897523673e-08, "loss": 0.7304, "step": 27115 }, { "epoch": 0.9577860293435885, "grad_norm": 2.0406312942504883, "learning_rate": 4.665158024510397e-08, "loss": 0.7763, "step": 27116 }, { "epoch": 0.9578213511472964, "grad_norm": 1.7627941370010376, "learning_rate": 4.6573656343464336e-08, "loss": 0.7632, "step": 27117 }, { "epoch": 0.9578566729510043, "grad_norm": 1.543691635131836, "learning_rate": 4.649579727133702e-08, "loss": 0.7391, "step": 27118 }, { "epoch": 0.9578919947547121, "grad_norm": 1.6735315322875977, "learning_rate": 4.641800302974231e-08, "loss": 0.7338, "step": 27119 }, { "epoch": 0.95792731655842, "grad_norm": 1.6261576414108276, "learning_rate": 4.6340273619697173e-08, "loss": 0.7678, "step": 27120 }, { "epoch": 0.9579626383621279, "grad_norm": 1.7645115852355957, "learning_rate": 4.626260904221969e-08, "loss": 0.7416, "step": 27121 }, { "epoch": 0.9579979601658358, "grad_norm": 2.0132908821105957, "learning_rate": 4.61850092983257e-08, "loss": 0.7457, "step": 27122 }, { "epoch": 0.9580332819695437, "grad_norm": 1.6456377506256104, "learning_rate": 4.610747438903107e-08, "loss": 0.7533, "step": 27123 }, { "epoch": 0.9580686037732516, "grad_norm": 1.4785159826278687, "learning_rate": 4.603000431535054e-08, "loss": 0.7131, "step": 27124 }, { "epoch": 0.9581039255769596, "grad_norm": 1.6122368574142456, "learning_rate": 4.595259907829774e-08, "loss": 0.7489, "step": 27125 }, { "epoch": 0.9581392473806675, "grad_norm": 1.7508407831192017, "learning_rate": 4.587525867888687e-08, "loss": 0.7525, "step": 27126 }, { "epoch": 0.9581745691843754, "grad_norm": 1.5833660364151, "learning_rate": 4.579798311812877e-08, "loss": 0.7363, "step": 27127 }, { "epoch": 0.9582098909880833, "grad_norm": 1.7141989469528198, "learning_rate": 4.572077239703543e-08, "loss": 0.7365, "step": 27128 }, { "epoch": 0.9582452127917912, "grad_norm": 1.8377357721328735, "learning_rate": 4.564362651661769e-08, "loss": 0.782, "step": 27129 }, { "epoch": 0.9582805345954991, "grad_norm": 1.4448000192642212, "learning_rate": 4.5566545477884194e-08, "loss": 0.7373, "step": 27130 }, { "epoch": 0.958315856399207, "grad_norm": 1.659576416015625, "learning_rate": 4.548952928184469e-08, "loss": 0.722, "step": 27131 }, { "epoch": 0.9583511782029149, "grad_norm": 1.779870629310608, "learning_rate": 4.541257792950726e-08, "loss": 0.774, "step": 27132 }, { "epoch": 0.9583865000066228, "grad_norm": 1.8993749618530273, "learning_rate": 4.5335691421878326e-08, "loss": 0.8005, "step": 27133 }, { "epoch": 0.9584218218103308, "grad_norm": 1.8448739051818848, "learning_rate": 4.5258869759964296e-08, "loss": 0.7637, "step": 27134 }, { "epoch": 0.9584571436140387, "grad_norm": 1.9337254762649536, "learning_rate": 4.518211294477159e-08, "loss": 0.7334, "step": 27135 }, { "epoch": 0.9584924654177466, "grad_norm": 1.8069043159484863, "learning_rate": 4.5105420977303307e-08, "loss": 0.7654, "step": 27136 }, { "epoch": 0.9585277872214545, "grad_norm": 1.8396847248077393, "learning_rate": 4.502879385856362e-08, "loss": 0.7813, "step": 27137 }, { "epoch": 0.9585631090251624, "grad_norm": 1.673044204711914, "learning_rate": 4.4952231589556195e-08, "loss": 0.7638, "step": 27138 }, { "epoch": 0.9585984308288703, "grad_norm": 1.8543676137924194, "learning_rate": 4.4875734171282435e-08, "loss": 0.7685, "step": 27139 }, { "epoch": 0.9586337526325782, "grad_norm": 1.6569474935531616, "learning_rate": 4.4799301604743775e-08, "loss": 0.7682, "step": 27140 }, { "epoch": 0.9586690744362861, "grad_norm": 1.721756935119629, "learning_rate": 4.472293389093996e-08, "loss": 0.7519, "step": 27141 }, { "epoch": 0.958704396239994, "grad_norm": 1.5789366960525513, "learning_rate": 4.46466310308713e-08, "loss": 0.773, "step": 27142 }, { "epoch": 0.958739718043702, "grad_norm": 1.6028646230697632, "learning_rate": 4.457039302553534e-08, "loss": 0.7376, "step": 27143 }, { "epoch": 0.9587750398474099, "grad_norm": 1.652879238128662, "learning_rate": 4.449421987593072e-08, "loss": 0.7302, "step": 27144 }, { "epoch": 0.9588103616511177, "grad_norm": 2.4811556339263916, "learning_rate": 4.441811158305498e-08, "loss": 0.7622, "step": 27145 }, { "epoch": 0.9588456834548256, "grad_norm": 1.611449122428894, "learning_rate": 4.4342068147902874e-08, "loss": 0.7779, "step": 27146 }, { "epoch": 0.9588810052585335, "grad_norm": 1.5525413751602173, "learning_rate": 4.426608957147027e-08, "loss": 0.7139, "step": 27147 }, { "epoch": 0.9589163270622414, "grad_norm": 1.8279762268066406, "learning_rate": 4.419017585475138e-08, "loss": 0.7893, "step": 27148 }, { "epoch": 0.9589516488659493, "grad_norm": 1.7507327795028687, "learning_rate": 4.4114326998739855e-08, "loss": 0.7773, "step": 27149 }, { "epoch": 0.9589869706696572, "grad_norm": 1.8846118450164795, "learning_rate": 4.403854300442878e-08, "loss": 0.7591, "step": 27150 }, { "epoch": 0.9590222924733651, "grad_norm": 1.8034284114837646, "learning_rate": 4.396282387280959e-08, "loss": 0.7747, "step": 27151 }, { "epoch": 0.959057614277073, "grad_norm": 1.634740948677063, "learning_rate": 4.388716960487316e-08, "loss": 0.7434, "step": 27152 }, { "epoch": 0.959092936080781, "grad_norm": 1.6725831031799316, "learning_rate": 4.3811580201609803e-08, "loss": 0.7581, "step": 27153 }, { "epoch": 0.9591282578844889, "grad_norm": 1.703352451324463, "learning_rate": 4.373605566400874e-08, "loss": 0.7737, "step": 27154 }, { "epoch": 0.9591635796881968, "grad_norm": 1.7606533765792847, "learning_rate": 4.3660595993058605e-08, "loss": 0.7391, "step": 27155 }, { "epoch": 0.9591989014919047, "grad_norm": 1.5276867151260376, "learning_rate": 4.3585201189746965e-08, "loss": 0.7673, "step": 27156 }, { "epoch": 0.9592342232956126, "grad_norm": 1.8695378303527832, "learning_rate": 4.3509871255060234e-08, "loss": 0.7773, "step": 27157 }, { "epoch": 0.9592695450993205, "grad_norm": 1.5352863073349, "learning_rate": 4.3434606189985405e-08, "loss": 0.7652, "step": 27158 }, { "epoch": 0.9593048669030284, "grad_norm": 1.7947347164154053, "learning_rate": 4.3359405995505585e-08, "loss": 0.7739, "step": 27159 }, { "epoch": 0.9593401887067363, "grad_norm": 2.8527181148529053, "learning_rate": 4.3284270672606654e-08, "loss": 0.752, "step": 27160 }, { "epoch": 0.9593755105104442, "grad_norm": 1.6749716997146606, "learning_rate": 4.3209200222271707e-08, "loss": 0.7537, "step": 27161 }, { "epoch": 0.9594108323141521, "grad_norm": 2.109370708465576, "learning_rate": 4.313419464548274e-08, "loss": 0.7725, "step": 27162 }, { "epoch": 0.95944615411786, "grad_norm": 1.640152931213379, "learning_rate": 4.305925394322119e-08, "loss": 0.7181, "step": 27163 }, { "epoch": 0.959481475921568, "grad_norm": 2.3638644218444824, "learning_rate": 4.298437811646905e-08, "loss": 0.7543, "step": 27164 }, { "epoch": 0.9595167977252759, "grad_norm": 2.241224765777588, "learning_rate": 4.290956716620498e-08, "loss": 0.7345, "step": 27165 }, { "epoch": 0.9595521195289838, "grad_norm": 1.6887788772583008, "learning_rate": 4.2834821093408754e-08, "loss": 0.7421, "step": 27166 }, { "epoch": 0.9595874413326917, "grad_norm": 1.6000382900238037, "learning_rate": 4.276013989905903e-08, "loss": 0.7373, "step": 27167 }, { "epoch": 0.9596227631363996, "grad_norm": 1.608148455619812, "learning_rate": 4.268552358413225e-08, "loss": 0.7534, "step": 27168 }, { "epoch": 0.9596580849401075, "grad_norm": 1.6463847160339355, "learning_rate": 4.261097214960541e-08, "loss": 0.7419, "step": 27169 }, { "epoch": 0.9596934067438154, "grad_norm": 1.7336660623550415, "learning_rate": 4.25364855964544e-08, "loss": 0.741, "step": 27170 }, { "epoch": 0.9597287285475232, "grad_norm": 1.6063923835754395, "learning_rate": 4.246206392565455e-08, "loss": 0.7749, "step": 27171 }, { "epoch": 0.9597640503512311, "grad_norm": 1.7232967615127563, "learning_rate": 4.2387707138178415e-08, "loss": 0.7798, "step": 27172 }, { "epoch": 0.959799372154939, "grad_norm": 2.2354087829589844, "learning_rate": 4.231341523500077e-08, "loss": 0.7488, "step": 27173 }, { "epoch": 0.959834693958647, "grad_norm": 1.793371558189392, "learning_rate": 4.22391882170925e-08, "loss": 0.7372, "step": 27174 }, { "epoch": 0.9598700157623549, "grad_norm": 1.7383121252059937, "learning_rate": 4.216502608542617e-08, "loss": 0.7797, "step": 27175 }, { "epoch": 0.9599053375660628, "grad_norm": 1.731207251548767, "learning_rate": 4.20909288409721e-08, "loss": 0.7646, "step": 27176 }, { "epoch": 0.9599406593697707, "grad_norm": 1.689797043800354, "learning_rate": 4.201689648470009e-08, "loss": 0.7548, "step": 27177 }, { "epoch": 0.9599759811734786, "grad_norm": 2.1087193489074707, "learning_rate": 4.1942929017578793e-08, "loss": 0.7485, "step": 27178 }, { "epoch": 0.9600113029771865, "grad_norm": 1.7138569355010986, "learning_rate": 4.186902644057633e-08, "loss": 0.7755, "step": 27179 }, { "epoch": 0.9600466247808944, "grad_norm": 1.851306438446045, "learning_rate": 4.179518875465971e-08, "loss": 0.7607, "step": 27180 }, { "epoch": 0.9600819465846023, "grad_norm": 1.6745704412460327, "learning_rate": 4.172141596079593e-08, "loss": 0.7551, "step": 27181 }, { "epoch": 0.9601172683883102, "grad_norm": 1.7741729021072388, "learning_rate": 4.164770805995033e-08, "loss": 0.7683, "step": 27182 }, { "epoch": 0.9601525901920182, "grad_norm": 2.701517343521118, "learning_rate": 4.1574065053087145e-08, "loss": 0.76, "step": 27183 }, { "epoch": 0.9601879119957261, "grad_norm": 2.347404956817627, "learning_rate": 4.150048694117115e-08, "loss": 0.7647, "step": 27184 }, { "epoch": 0.960223233799434, "grad_norm": 1.7298036813735962, "learning_rate": 4.14269737251638e-08, "loss": 0.7795, "step": 27185 }, { "epoch": 0.9602585556031419, "grad_norm": 1.6858277320861816, "learning_rate": 4.135352540602877e-08, "loss": 0.7503, "step": 27186 }, { "epoch": 0.9602938774068498, "grad_norm": 1.6616742610931396, "learning_rate": 4.128014198472641e-08, "loss": 0.7131, "step": 27187 }, { "epoch": 0.9603291992105577, "grad_norm": 1.8107235431671143, "learning_rate": 4.120682346221705e-08, "loss": 0.778, "step": 27188 }, { "epoch": 0.9603645210142656, "grad_norm": 1.7773189544677734, "learning_rate": 4.113356983946104e-08, "loss": 0.7637, "step": 27189 }, { "epoch": 0.9603998428179735, "grad_norm": 1.7025396823883057, "learning_rate": 4.106038111741706e-08, "loss": 0.7238, "step": 27190 }, { "epoch": 0.9604351646216814, "grad_norm": 1.8474286794662476, "learning_rate": 4.0987257297042114e-08, "loss": 0.7883, "step": 27191 }, { "epoch": 0.9604704864253893, "grad_norm": 1.6488679647445679, "learning_rate": 4.091419837929378e-08, "loss": 0.7704, "step": 27192 }, { "epoch": 0.9605058082290973, "grad_norm": 1.6061878204345703, "learning_rate": 4.084120436512851e-08, "loss": 0.7406, "step": 27193 }, { "epoch": 0.9605411300328052, "grad_norm": 1.55356764793396, "learning_rate": 4.076827525550164e-08, "loss": 0.7477, "step": 27194 }, { "epoch": 0.9605764518365131, "grad_norm": 1.9706617593765259, "learning_rate": 4.069541105136632e-08, "loss": 0.7597, "step": 27195 }, { "epoch": 0.960611773640221, "grad_norm": 1.9632182121276855, "learning_rate": 4.062261175367788e-08, "loss": 0.8132, "step": 27196 }, { "epoch": 0.9606470954439288, "grad_norm": 1.9166126251220703, "learning_rate": 4.054987736338834e-08, "loss": 0.7278, "step": 27197 }, { "epoch": 0.9606824172476367, "grad_norm": 1.7193835973739624, "learning_rate": 4.047720788144971e-08, "loss": 0.7859, "step": 27198 }, { "epoch": 0.9607177390513446, "grad_norm": 1.6182307004928589, "learning_rate": 4.0404603308812906e-08, "loss": 0.7641, "step": 27199 }, { "epoch": 0.9607530608550525, "grad_norm": 1.620752215385437, "learning_rate": 4.0332063646428834e-08, "loss": 0.7502, "step": 27200 }, { "epoch": 0.9607883826587604, "grad_norm": 4.063988208770752, "learning_rate": 4.0259588895246174e-08, "loss": 0.7585, "step": 27201 }, { "epoch": 0.9608237044624683, "grad_norm": 1.657739281654358, "learning_rate": 4.018717905621361e-08, "loss": 0.7759, "step": 27202 }, { "epoch": 0.9608590262661763, "grad_norm": 1.7504936456680298, "learning_rate": 4.011483413027872e-08, "loss": 0.7615, "step": 27203 }, { "epoch": 0.9608943480698842, "grad_norm": 1.6625553369522095, "learning_rate": 4.004255411838853e-08, "loss": 0.7658, "step": 27204 }, { "epoch": 0.9609296698735921, "grad_norm": 1.5952937602996826, "learning_rate": 3.99703390214895e-08, "loss": 0.7456, "step": 27205 }, { "epoch": 0.9609649916773, "grad_norm": 1.95535147190094, "learning_rate": 3.989818884052587e-08, "loss": 0.7624, "step": 27206 }, { "epoch": 0.9610003134810079, "grad_norm": 1.9769617319107056, "learning_rate": 3.9826103576442454e-08, "loss": 0.774, "step": 27207 }, { "epoch": 0.9610356352847158, "grad_norm": 2.142343759536743, "learning_rate": 3.975408323018237e-08, "loss": 0.7471, "step": 27208 }, { "epoch": 0.9610709570884237, "grad_norm": 1.762371301651001, "learning_rate": 3.968212780268932e-08, "loss": 0.7458, "step": 27209 }, { "epoch": 0.9611062788921316, "grad_norm": 1.749392032623291, "learning_rate": 3.96102372949031e-08, "loss": 0.7261, "step": 27210 }, { "epoch": 0.9611416006958395, "grad_norm": 1.7121875286102295, "learning_rate": 3.953841170776573e-08, "loss": 0.7714, "step": 27211 }, { "epoch": 0.9611769224995474, "grad_norm": 1.5633786916732788, "learning_rate": 3.9466651042218144e-08, "loss": 0.7523, "step": 27212 }, { "epoch": 0.9612122443032554, "grad_norm": 1.6164854764938354, "learning_rate": 3.93949552991979e-08, "loss": 0.7703, "step": 27213 }, { "epoch": 0.9612475661069633, "grad_norm": 1.9942375421524048, "learning_rate": 3.932332447964371e-08, "loss": 0.8202, "step": 27214 }, { "epoch": 0.9612828879106712, "grad_norm": 1.7273589372634888, "learning_rate": 3.9251758584493704e-08, "loss": 0.7948, "step": 27215 }, { "epoch": 0.9613182097143791, "grad_norm": 1.6139291524887085, "learning_rate": 3.9180257614683806e-08, "loss": 0.7471, "step": 27216 }, { "epoch": 0.961353531518087, "grad_norm": 1.7140823602676392, "learning_rate": 3.910882157115048e-08, "loss": 0.7778, "step": 27217 }, { "epoch": 0.9613888533217949, "grad_norm": 1.7549532651901245, "learning_rate": 3.9037450454827985e-08, "loss": 0.7758, "step": 27218 }, { "epoch": 0.9614241751255028, "grad_norm": 1.6186871528625488, "learning_rate": 3.896614426665168e-08, "loss": 0.7586, "step": 27219 }, { "epoch": 0.9614594969292107, "grad_norm": 1.947858214378357, "learning_rate": 3.8894903007553056e-08, "loss": 0.7647, "step": 27220 }, { "epoch": 0.9614948187329186, "grad_norm": 1.6199053525924683, "learning_rate": 3.8823726678465234e-08, "loss": 0.7687, "step": 27221 }, { "epoch": 0.9615301405366266, "grad_norm": 1.6195039749145508, "learning_rate": 3.875261528032082e-08, "loss": 0.72, "step": 27222 }, { "epoch": 0.9615654623403344, "grad_norm": 1.7166244983673096, "learning_rate": 3.8681568814048495e-08, "loss": 0.7996, "step": 27223 }, { "epoch": 0.9616007841440423, "grad_norm": 1.6702227592468262, "learning_rate": 3.861058728057976e-08, "loss": 0.7695, "step": 27224 }, { "epoch": 0.9616361059477502, "grad_norm": 1.4994151592254639, "learning_rate": 3.8539670680843297e-08, "loss": 0.7505, "step": 27225 }, { "epoch": 0.9616714277514581, "grad_norm": 1.8022557497024536, "learning_rate": 3.846881901576616e-08, "loss": 0.7843, "step": 27226 }, { "epoch": 0.961706749555166, "grad_norm": 1.6411402225494385, "learning_rate": 3.8398032286277033e-08, "loss": 0.7664, "step": 27227 }, { "epoch": 0.9617420713588739, "grad_norm": 1.763121247291565, "learning_rate": 3.832731049330185e-08, "loss": 0.7756, "step": 27228 }, { "epoch": 0.9617773931625818, "grad_norm": 2.839212656021118, "learning_rate": 3.8256653637765426e-08, "loss": 0.7584, "step": 27229 }, { "epoch": 0.9618127149662897, "grad_norm": 1.646344780921936, "learning_rate": 3.818606172059369e-08, "loss": 0.7758, "step": 27230 }, { "epoch": 0.9618480367699976, "grad_norm": 2.154263734817505, "learning_rate": 3.811553474271035e-08, "loss": 0.7361, "step": 27231 }, { "epoch": 0.9618833585737055, "grad_norm": 1.6914150714874268, "learning_rate": 3.804507270503799e-08, "loss": 0.7597, "step": 27232 }, { "epoch": 0.9619186803774135, "grad_norm": 1.6778180599212646, "learning_rate": 3.7974675608498656e-08, "loss": 0.7294, "step": 27233 }, { "epoch": 0.9619540021811214, "grad_norm": 1.5924185514450073, "learning_rate": 3.790434345401439e-08, "loss": 0.73, "step": 27234 }, { "epoch": 0.9619893239848293, "grad_norm": 1.943649411201477, "learning_rate": 3.783407624250557e-08, "loss": 0.7867, "step": 27235 }, { "epoch": 0.9620246457885372, "grad_norm": 1.6375994682312012, "learning_rate": 3.77638739748909e-08, "loss": 0.7484, "step": 27236 }, { "epoch": 0.9620599675922451, "grad_norm": 1.756737470626831, "learning_rate": 3.769373665208964e-08, "loss": 0.7818, "step": 27237 }, { "epoch": 0.962095289395953, "grad_norm": 1.7194797992706299, "learning_rate": 3.762366427502107e-08, "loss": 0.7437, "step": 27238 }, { "epoch": 0.9621306111996609, "grad_norm": 1.6154248714447021, "learning_rate": 3.7553656844599994e-08, "loss": 0.7782, "step": 27239 }, { "epoch": 0.9621659330033688, "grad_norm": 1.6813933849334717, "learning_rate": 3.7483714361744586e-08, "loss": 0.7497, "step": 27240 }, { "epoch": 0.9622012548070767, "grad_norm": 1.4689364433288574, "learning_rate": 3.74138368273691e-08, "loss": 0.747, "step": 27241 }, { "epoch": 0.9622365766107847, "grad_norm": 1.6937592029571533, "learning_rate": 3.7344024242388365e-08, "loss": 0.7605, "step": 27242 }, { "epoch": 0.9622718984144926, "grad_norm": 0.9493674039840698, "learning_rate": 3.72742766077161e-08, "loss": 0.5501, "step": 27243 }, { "epoch": 0.9623072202182005, "grad_norm": 1.7607582807540894, "learning_rate": 3.720459392426545e-08, "loss": 0.7667, "step": 27244 }, { "epoch": 0.9623425420219084, "grad_norm": 2.105419158935547, "learning_rate": 3.713497619294848e-08, "loss": 0.7708, "step": 27245 }, { "epoch": 0.9623778638256163, "grad_norm": 1.8861751556396484, "learning_rate": 3.706542341467556e-08, "loss": 0.7886, "step": 27246 }, { "epoch": 0.9624131856293242, "grad_norm": 1.5786985158920288, "learning_rate": 3.699593559035764e-08, "loss": 0.7554, "step": 27247 }, { "epoch": 0.9624485074330321, "grad_norm": 1.6970142126083374, "learning_rate": 3.692651272090397e-08, "loss": 0.8064, "step": 27248 }, { "epoch": 0.96248382923674, "grad_norm": 1.6924550533294678, "learning_rate": 3.685715480722329e-08, "loss": 0.7392, "step": 27249 }, { "epoch": 0.9625191510404478, "grad_norm": 1.6794462203979492, "learning_rate": 3.678786185022321e-08, "loss": 0.7529, "step": 27250 }, { "epoch": 0.9625544728441557, "grad_norm": 1.6979427337646484, "learning_rate": 3.67186338508102e-08, "loss": 0.7676, "step": 27251 }, { "epoch": 0.9625897946478637, "grad_norm": 1.9598642587661743, "learning_rate": 3.6649470809891344e-08, "loss": 0.7517, "step": 27252 }, { "epoch": 0.9626251164515716, "grad_norm": 2.4648749828338623, "learning_rate": 3.658037272837034e-08, "loss": 0.7488, "step": 27253 }, { "epoch": 0.9626604382552795, "grad_norm": 1.7593022584915161, "learning_rate": 3.651133960715314e-08, "loss": 0.7807, "step": 27254 }, { "epoch": 0.9626957600589874, "grad_norm": 2.7028377056121826, "learning_rate": 3.644237144714235e-08, "loss": 0.7776, "step": 27255 }, { "epoch": 0.9627310818626953, "grad_norm": 1.861865758895874, "learning_rate": 3.63734682492406e-08, "loss": 0.7548, "step": 27256 }, { "epoch": 0.9627664036664032, "grad_norm": 1.7713640928268433, "learning_rate": 3.630463001434936e-08, "loss": 0.7671, "step": 27257 }, { "epoch": 0.9628017254701111, "grad_norm": 1.745484709739685, "learning_rate": 3.6235856743370714e-08, "loss": 0.7406, "step": 27258 }, { "epoch": 0.962837047273819, "grad_norm": 1.6410460472106934, "learning_rate": 3.616714843720393e-08, "loss": 0.7613, "step": 27259 }, { "epoch": 0.9628723690775269, "grad_norm": 2.6773996353149414, "learning_rate": 3.6098505096748284e-08, "loss": 0.7462, "step": 27260 }, { "epoch": 0.9629076908812348, "grad_norm": 1.7088109254837036, "learning_rate": 3.602992672290251e-08, "loss": 0.7736, "step": 27261 }, { "epoch": 0.9629430126849428, "grad_norm": 1.6209388971328735, "learning_rate": 3.5961413316563664e-08, "loss": 0.7334, "step": 27262 }, { "epoch": 0.9629783344886507, "grad_norm": 1.8423253297805786, "learning_rate": 3.589296487862881e-08, "loss": 0.7787, "step": 27263 }, { "epoch": 0.9630136562923586, "grad_norm": 1.7121567726135254, "learning_rate": 3.58245814099939e-08, "loss": 0.7536, "step": 27264 }, { "epoch": 0.9630489780960665, "grad_norm": 1.7365823984146118, "learning_rate": 3.575626291155376e-08, "loss": 0.7697, "step": 27265 }, { "epoch": 0.9630842998997744, "grad_norm": 1.704810380935669, "learning_rate": 3.5688009384202135e-08, "loss": 0.7335, "step": 27266 }, { "epoch": 0.9631196217034823, "grad_norm": 1.7182213068008423, "learning_rate": 3.561982082883275e-08, "loss": 0.7434, "step": 27267 }, { "epoch": 0.9631549435071902, "grad_norm": 1.7104476690292358, "learning_rate": 3.5551697246338224e-08, "loss": 0.7865, "step": 27268 }, { "epoch": 0.9631902653108981, "grad_norm": 1.611454725265503, "learning_rate": 3.548363863760951e-08, "loss": 0.7652, "step": 27269 }, { "epoch": 0.963225587114606, "grad_norm": 1.9285409450531006, "learning_rate": 3.541564500353811e-08, "loss": 0.7767, "step": 27270 }, { "epoch": 0.963260908918314, "grad_norm": 1.6317470073699951, "learning_rate": 3.534771634501333e-08, "loss": 0.7894, "step": 27271 }, { "epoch": 0.9632962307220219, "grad_norm": 1.6455632448196411, "learning_rate": 3.5279852662923886e-08, "loss": 0.7669, "step": 27272 }, { "epoch": 0.9633315525257298, "grad_norm": 0.8943785429000854, "learning_rate": 3.5212053958159076e-08, "loss": 0.5689, "step": 27273 }, { "epoch": 0.9633668743294377, "grad_norm": 1.679158329963684, "learning_rate": 3.514432023160541e-08, "loss": 0.735, "step": 27274 }, { "epoch": 0.9634021961331456, "grad_norm": 1.5403528213500977, "learning_rate": 3.50766514841494e-08, "loss": 0.7336, "step": 27275 }, { "epoch": 0.9634375179368534, "grad_norm": 1.7919244766235352, "learning_rate": 3.5009047716677016e-08, "loss": 0.7556, "step": 27276 }, { "epoch": 0.9634728397405613, "grad_norm": 2.080634832382202, "learning_rate": 3.4941508930073084e-08, "loss": 0.741, "step": 27277 }, { "epoch": 0.9635081615442692, "grad_norm": 1.598852276802063, "learning_rate": 3.487403512522136e-08, "loss": 0.7404, "step": 27278 }, { "epoch": 0.9635434833479771, "grad_norm": 1.7310022115707397, "learning_rate": 3.4806626303004466e-08, "loss": 0.7522, "step": 27279 }, { "epoch": 0.963578805151685, "grad_norm": 1.6397594213485718, "learning_rate": 3.473928246430558e-08, "loss": 0.7487, "step": 27280 }, { "epoch": 0.963614126955393, "grad_norm": 1.5619291067123413, "learning_rate": 3.4672003610005665e-08, "loss": 0.7543, "step": 27281 }, { "epoch": 0.9636494487591009, "grad_norm": 1.6295220851898193, "learning_rate": 3.460478974098458e-08, "loss": 0.7455, "step": 27282 }, { "epoch": 0.9636847705628088, "grad_norm": 1.6170839071273804, "learning_rate": 3.453764085812328e-08, "loss": 0.7867, "step": 27283 }, { "epoch": 0.9637200923665167, "grad_norm": 1.6417728662490845, "learning_rate": 3.4470556962299396e-08, "loss": 0.7446, "step": 27284 }, { "epoch": 0.9637554141702246, "grad_norm": 1.899919867515564, "learning_rate": 3.440353805439167e-08, "loss": 0.8086, "step": 27285 }, { "epoch": 0.9637907359739325, "grad_norm": 1.5159810781478882, "learning_rate": 3.433658413527718e-08, "loss": 0.7366, "step": 27286 }, { "epoch": 0.9638260577776404, "grad_norm": 1.6463357210159302, "learning_rate": 3.42696952058319e-08, "loss": 0.7161, "step": 27287 }, { "epoch": 0.9638613795813483, "grad_norm": 1.7351312637329102, "learning_rate": 3.4202871266931224e-08, "loss": 0.7497, "step": 27288 }, { "epoch": 0.9638967013850562, "grad_norm": 1.8193211555480957, "learning_rate": 3.4136112319450024e-08, "loss": 0.7762, "step": 27289 }, { "epoch": 0.9639320231887641, "grad_norm": 2.0439491271972656, "learning_rate": 3.406941836426203e-08, "loss": 0.7603, "step": 27290 }, { "epoch": 0.963967344992472, "grad_norm": 2.3743886947631836, "learning_rate": 3.400278940223989e-08, "loss": 0.7958, "step": 27291 }, { "epoch": 0.96400266679618, "grad_norm": 1.7075531482696533, "learning_rate": 3.3936225434255676e-08, "loss": 0.756, "step": 27292 }, { "epoch": 0.9640379885998879, "grad_norm": 2.1411468982696533, "learning_rate": 3.3869726461180915e-08, "loss": 0.7487, "step": 27293 }, { "epoch": 0.9640733104035958, "grad_norm": 1.955579400062561, "learning_rate": 3.3803292483885473e-08, "loss": 0.7576, "step": 27294 }, { "epoch": 0.9641086322073037, "grad_norm": 2.070957899093628, "learning_rate": 3.373692350323865e-08, "loss": 0.7932, "step": 27295 }, { "epoch": 0.9641439540110116, "grad_norm": 1.6748756170272827, "learning_rate": 3.367061952010975e-08, "loss": 0.7309, "step": 27296 }, { "epoch": 0.9641792758147195, "grad_norm": 1.8131608963012695, "learning_rate": 3.360438053536641e-08, "loss": 0.7337, "step": 27297 }, { "epoch": 0.9642145976184274, "grad_norm": 1.7164597511291504, "learning_rate": 3.353820654987461e-08, "loss": 0.7475, "step": 27298 }, { "epoch": 0.9642499194221353, "grad_norm": 1.6269047260284424, "learning_rate": 3.347209756450198e-08, "loss": 0.7327, "step": 27299 }, { "epoch": 0.9642852412258432, "grad_norm": 1.6550896167755127, "learning_rate": 3.340605358011284e-08, "loss": 0.7683, "step": 27300 }, { "epoch": 0.9643205630295512, "grad_norm": 1.5884416103363037, "learning_rate": 3.3340074597570935e-08, "loss": 0.7659, "step": 27301 }, { "epoch": 0.964355884833259, "grad_norm": 2.439096450805664, "learning_rate": 3.327416061774114e-08, "loss": 0.7272, "step": 27302 }, { "epoch": 0.9643912066369669, "grad_norm": 1.522233247756958, "learning_rate": 3.3208311641485525e-08, "loss": 0.7271, "step": 27303 }, { "epoch": 0.9644265284406748, "grad_norm": 2.2697503566741943, "learning_rate": 3.314252766966564e-08, "loss": 0.7705, "step": 27304 }, { "epoch": 0.9644618502443827, "grad_norm": 1.7575199604034424, "learning_rate": 3.3076808703143004e-08, "loss": 0.7769, "step": 27305 }, { "epoch": 0.9644971720480906, "grad_norm": 2.118252754211426, "learning_rate": 3.30111547427775e-08, "loss": 0.7994, "step": 27306 }, { "epoch": 0.9645324938517985, "grad_norm": 4.566475868225098, "learning_rate": 3.294556578942787e-08, "loss": 0.7904, "step": 27307 }, { "epoch": 0.9645678156555064, "grad_norm": 1.799187183380127, "learning_rate": 3.288004184395288e-08, "loss": 0.8115, "step": 27308 }, { "epoch": 0.9646031374592143, "grad_norm": 0.9104251861572266, "learning_rate": 3.281458290721018e-08, "loss": 0.5469, "step": 27309 }, { "epoch": 0.9646384592629222, "grad_norm": 1.7132439613342285, "learning_rate": 3.2749188980056856e-08, "loss": 0.7478, "step": 27310 }, { "epoch": 0.9646737810666302, "grad_norm": 1.8177516460418701, "learning_rate": 3.2683860063348335e-08, "loss": 0.7646, "step": 27311 }, { "epoch": 0.9647091028703381, "grad_norm": 1.5898041725158691, "learning_rate": 3.2618596157939497e-08, "loss": 0.7536, "step": 27312 }, { "epoch": 0.964744424674046, "grad_norm": 1.8776766061782837, "learning_rate": 3.255339726468465e-08, "loss": 0.7594, "step": 27313 }, { "epoch": 0.9647797464777539, "grad_norm": 2.0154879093170166, "learning_rate": 3.248826338443756e-08, "loss": 0.7897, "step": 27314 }, { "epoch": 0.9648150682814618, "grad_norm": 2.3412177562713623, "learning_rate": 3.242319451804976e-08, "loss": 0.7514, "step": 27315 }, { "epoch": 0.9648503900851697, "grad_norm": 2.025893211364746, "learning_rate": 3.2358190666373914e-08, "loss": 0.7406, "step": 27316 }, { "epoch": 0.9648857118888776, "grad_norm": 1.6250128746032715, "learning_rate": 3.229325183025989e-08, "loss": 0.7519, "step": 27317 }, { "epoch": 0.9649210336925855, "grad_norm": 1.8951140642166138, "learning_rate": 3.222837801055812e-08, "loss": 0.7762, "step": 27318 }, { "epoch": 0.9649563554962934, "grad_norm": 1.685133934020996, "learning_rate": 3.216356920811736e-08, "loss": 0.784, "step": 27319 }, { "epoch": 0.9649916773000013, "grad_norm": 1.9676858186721802, "learning_rate": 3.2098825423786395e-08, "loss": 0.7687, "step": 27320 }, { "epoch": 0.9650269991037093, "grad_norm": 1.857244849205017, "learning_rate": 3.203414665841176e-08, "loss": 0.7948, "step": 27321 }, { "epoch": 0.9650623209074172, "grad_norm": 1.6447371244430542, "learning_rate": 3.196953291284055e-08, "loss": 0.7792, "step": 27322 }, { "epoch": 0.9650976427111251, "grad_norm": 1.6869665384292603, "learning_rate": 3.190498418791821e-08, "loss": 0.7734, "step": 27323 }, { "epoch": 0.965132964514833, "grad_norm": 1.6493217945098877, "learning_rate": 3.1840500484489055e-08, "loss": 0.7245, "step": 27324 }, { "epoch": 0.9651682863185409, "grad_norm": 1.9902492761611938, "learning_rate": 3.1776081803397974e-08, "loss": 0.7427, "step": 27325 }, { "epoch": 0.9652036081222488, "grad_norm": 1.772879719734192, "learning_rate": 3.171172814548762e-08, "loss": 0.7702, "step": 27326 }, { "epoch": 0.9652389299259567, "grad_norm": 1.7521700859069824, "learning_rate": 3.1647439511599545e-08, "loss": 0.7919, "step": 27327 }, { "epoch": 0.9652742517296645, "grad_norm": 1.8675477504730225, "learning_rate": 3.15832159025764e-08, "loss": 0.7843, "step": 27328 }, { "epoch": 0.9653095735333724, "grad_norm": 1.9196845293045044, "learning_rate": 3.1519057319258637e-08, "loss": 0.8069, "step": 27329 }, { "epoch": 0.9653448953370803, "grad_norm": 1.598227620124817, "learning_rate": 3.145496376248447e-08, "loss": 0.7302, "step": 27330 }, { "epoch": 0.9653802171407883, "grad_norm": 1.5301424264907837, "learning_rate": 3.139093523309433e-08, "loss": 0.7801, "step": 27331 }, { "epoch": 0.9654155389444962, "grad_norm": 0.905311107635498, "learning_rate": 3.1326971731925895e-08, "loss": 0.5724, "step": 27332 }, { "epoch": 0.9654508607482041, "grad_norm": 1.7764294147491455, "learning_rate": 3.1263073259815703e-08, "loss": 0.7668, "step": 27333 }, { "epoch": 0.965486182551912, "grad_norm": 1.7022819519042969, "learning_rate": 3.119923981760031e-08, "loss": 0.7493, "step": 27334 }, { "epoch": 0.9655215043556199, "grad_norm": 1.9699915647506714, "learning_rate": 3.113547140611517e-08, "loss": 0.8053, "step": 27335 }, { "epoch": 0.9655568261593278, "grad_norm": 1.881669044494629, "learning_rate": 3.1071768026195156e-08, "loss": 0.769, "step": 27336 }, { "epoch": 0.9655921479630357, "grad_norm": 1.8634636402130127, "learning_rate": 3.10081296786735e-08, "loss": 0.771, "step": 27337 }, { "epoch": 0.9656274697667436, "grad_norm": 1.8229175806045532, "learning_rate": 3.094455636438398e-08, "loss": 0.7668, "step": 27338 }, { "epoch": 0.9656627915704515, "grad_norm": 1.6477530002593994, "learning_rate": 3.088104808415759e-08, "loss": 0.7882, "step": 27339 }, { "epoch": 0.9656981133741595, "grad_norm": 1.6001062393188477, "learning_rate": 3.08176048388259e-08, "loss": 0.7543, "step": 27340 }, { "epoch": 0.9657334351778674, "grad_norm": 1.7189223766326904, "learning_rate": 3.075422662921934e-08, "loss": 0.7886, "step": 27341 }, { "epoch": 0.9657687569815753, "grad_norm": 1.5618817806243896, "learning_rate": 3.0690913456167816e-08, "loss": 0.7555, "step": 27342 }, { "epoch": 0.9658040787852832, "grad_norm": 1.9085122346878052, "learning_rate": 3.062766532049899e-08, "loss": 0.7352, "step": 27343 }, { "epoch": 0.9658394005889911, "grad_norm": 1.6229504346847534, "learning_rate": 3.056448222304109e-08, "loss": 0.7719, "step": 27344 }, { "epoch": 0.965874722392699, "grad_norm": 1.8778737783432007, "learning_rate": 3.050136416462124e-08, "loss": 0.7622, "step": 27345 }, { "epoch": 0.9659100441964069, "grad_norm": 2.1316792964935303, "learning_rate": 3.043831114606543e-08, "loss": 0.7443, "step": 27346 }, { "epoch": 0.9659453660001148, "grad_norm": 1.656997561454773, "learning_rate": 3.037532316819858e-08, "loss": 0.7666, "step": 27347 }, { "epoch": 0.9659806878038227, "grad_norm": 1.797372817993164, "learning_rate": 3.031240023184556e-08, "loss": 0.7585, "step": 27348 }, { "epoch": 0.9660160096075306, "grad_norm": 1.6161034107208252, "learning_rate": 3.024954233782962e-08, "loss": 0.7515, "step": 27349 }, { "epoch": 0.9660513314112386, "grad_norm": 1.6292500495910645, "learning_rate": 3.018674948697342e-08, "loss": 0.7537, "step": 27350 }, { "epoch": 0.9660866532149465, "grad_norm": 1.7131166458129883, "learning_rate": 3.0124021680099094e-08, "loss": 0.7497, "step": 27351 }, { "epoch": 0.9661219750186544, "grad_norm": 1.6744728088378906, "learning_rate": 3.0061358918026526e-08, "loss": 0.7728, "step": 27352 }, { "epoch": 0.9661572968223623, "grad_norm": 1.6308128833770752, "learning_rate": 2.99987612015773e-08, "loss": 0.7761, "step": 27353 }, { "epoch": 0.9661926186260701, "grad_norm": 1.5893614292144775, "learning_rate": 2.993622853157019e-08, "loss": 0.7386, "step": 27354 }, { "epoch": 0.966227940429778, "grad_norm": 1.5723925828933716, "learning_rate": 2.987376090882288e-08, "loss": 0.7409, "step": 27355 }, { "epoch": 0.9662632622334859, "grad_norm": 1.840559720993042, "learning_rate": 2.981135833415361e-08, "loss": 0.7849, "step": 27356 }, { "epoch": 0.9662985840371938, "grad_norm": 1.7748432159423828, "learning_rate": 2.9749020808379492e-08, "loss": 0.7414, "step": 27357 }, { "epoch": 0.9663339058409017, "grad_norm": 1.680274486541748, "learning_rate": 2.968674833231544e-08, "loss": 0.7523, "step": 27358 }, { "epoch": 0.9663692276446096, "grad_norm": 1.9143569469451904, "learning_rate": 2.9624540906776912e-08, "loss": 0.7821, "step": 27359 }, { "epoch": 0.9664045494483176, "grad_norm": 1.5666747093200684, "learning_rate": 2.9562398532578806e-08, "loss": 0.7381, "step": 27360 }, { "epoch": 0.9664398712520255, "grad_norm": 1.5051344633102417, "learning_rate": 2.9500321210532702e-08, "loss": 0.7635, "step": 27361 }, { "epoch": 0.9664751930557334, "grad_norm": 1.7731088399887085, "learning_rate": 2.9438308941452386e-08, "loss": 0.7357, "step": 27362 }, { "epoch": 0.9665105148594413, "grad_norm": 2.7868266105651855, "learning_rate": 2.9376361726149438e-08, "loss": 0.773, "step": 27363 }, { "epoch": 0.9665458366631492, "grad_norm": 1.760583758354187, "learning_rate": 2.931447956543376e-08, "loss": 0.7937, "step": 27364 }, { "epoch": 0.9665811584668571, "grad_norm": 1.8215845823287964, "learning_rate": 2.9252662460116376e-08, "loss": 0.746, "step": 27365 }, { "epoch": 0.966616480270565, "grad_norm": 1.7405074834823608, "learning_rate": 2.9190910411004967e-08, "loss": 0.7686, "step": 27366 }, { "epoch": 0.9666518020742729, "grad_norm": 1.7592308521270752, "learning_rate": 2.9129223418908893e-08, "loss": 0.7763, "step": 27367 }, { "epoch": 0.9666871238779808, "grad_norm": 1.660848617553711, "learning_rate": 2.906760148463528e-08, "loss": 0.731, "step": 27368 }, { "epoch": 0.9667224456816887, "grad_norm": 1.6328065395355225, "learning_rate": 2.90060446089907e-08, "loss": 0.7341, "step": 27369 }, { "epoch": 0.9667577674853967, "grad_norm": 1.6932960748672485, "learning_rate": 2.8944552792780078e-08, "loss": 0.7794, "step": 27370 }, { "epoch": 0.9667930892891046, "grad_norm": 1.5337965488433838, "learning_rate": 2.8883126036808874e-08, "loss": 0.7446, "step": 27371 }, { "epoch": 0.9668284110928125, "grad_norm": 1.6184104681015015, "learning_rate": 2.8821764341880887e-08, "loss": 0.7488, "step": 27372 }, { "epoch": 0.9668637328965204, "grad_norm": 1.6479417085647583, "learning_rate": 2.8760467708798812e-08, "loss": 0.7585, "step": 27373 }, { "epoch": 0.9668990547002283, "grad_norm": 1.6098854541778564, "learning_rate": 2.8699236138365893e-08, "loss": 0.7458, "step": 27374 }, { "epoch": 0.9669343765039362, "grad_norm": 2.243952989578247, "learning_rate": 2.8638069631382048e-08, "loss": 0.7383, "step": 27375 }, { "epoch": 0.9669696983076441, "grad_norm": 5.3230671882629395, "learning_rate": 2.8576968188649413e-08, "loss": 0.7975, "step": 27376 }, { "epoch": 0.967005020111352, "grad_norm": 1.6902782917022705, "learning_rate": 2.8515931810966235e-08, "loss": 0.7564, "step": 27377 }, { "epoch": 0.9670403419150599, "grad_norm": 1.7523226737976074, "learning_rate": 2.8454960499132434e-08, "loss": 0.7714, "step": 27378 }, { "epoch": 0.9670756637187679, "grad_norm": 1.689665675163269, "learning_rate": 2.8394054253945702e-08, "loss": 0.7782, "step": 27379 }, { "epoch": 0.9671109855224757, "grad_norm": 1.758448600769043, "learning_rate": 2.8333213076202626e-08, "loss": 0.7416, "step": 27380 }, { "epoch": 0.9671463073261836, "grad_norm": 1.745429515838623, "learning_rate": 2.8272436966700346e-08, "loss": 0.721, "step": 27381 }, { "epoch": 0.9671816291298915, "grad_norm": 2.712237596511841, "learning_rate": 2.8211725926233224e-08, "loss": 0.7966, "step": 27382 }, { "epoch": 0.9672169509335994, "grad_norm": 1.8199290037155151, "learning_rate": 2.8151079955596737e-08, "loss": 0.7669, "step": 27383 }, { "epoch": 0.9672522727373073, "grad_norm": 2.0128235816955566, "learning_rate": 2.8090499055584697e-08, "loss": 0.7984, "step": 27384 }, { "epoch": 0.9672875945410152, "grad_norm": 1.8267229795455933, "learning_rate": 2.8029983226989244e-08, "loss": 0.7557, "step": 27385 }, { "epoch": 0.9673229163447231, "grad_norm": 2.022778034210205, "learning_rate": 2.7969532470602524e-08, "loss": 0.7703, "step": 27386 }, { "epoch": 0.967358238148431, "grad_norm": 1.6639808416366577, "learning_rate": 2.7909146787216125e-08, "loss": 0.7431, "step": 27387 }, { "epoch": 0.9673935599521389, "grad_norm": 1.8640995025634766, "learning_rate": 2.784882617761997e-08, "loss": 0.7696, "step": 27388 }, { "epoch": 0.9674288817558468, "grad_norm": 1.7510592937469482, "learning_rate": 2.7788570642603984e-08, "loss": 0.7566, "step": 27389 }, { "epoch": 0.9674642035595548, "grad_norm": 1.8109124898910522, "learning_rate": 2.7728380182956427e-08, "loss": 0.7697, "step": 27390 }, { "epoch": 0.9674995253632627, "grad_norm": 1.9028598070144653, "learning_rate": 2.7668254799465e-08, "loss": 0.7489, "step": 27391 }, { "epoch": 0.9675348471669706, "grad_norm": 1.7495323419570923, "learning_rate": 2.760819449291685e-08, "loss": 0.755, "step": 27392 }, { "epoch": 0.9675701689706785, "grad_norm": 1.683282732963562, "learning_rate": 2.7548199264098573e-08, "loss": 0.7724, "step": 27393 }, { "epoch": 0.9676054907743864, "grad_norm": 1.7662636041641235, "learning_rate": 2.7488269113793987e-08, "loss": 0.7567, "step": 27394 }, { "epoch": 0.9676408125780943, "grad_norm": 1.737586498260498, "learning_rate": 2.7428404042788014e-08, "loss": 0.7682, "step": 27395 }, { "epoch": 0.9676761343818022, "grad_norm": 1.7616934776306152, "learning_rate": 2.7368604051865033e-08, "loss": 0.7226, "step": 27396 }, { "epoch": 0.9677114561855101, "grad_norm": 0.9387238025665283, "learning_rate": 2.7308869141806637e-08, "loss": 0.5398, "step": 27397 }, { "epoch": 0.967746777989218, "grad_norm": 1.5734999179840088, "learning_rate": 2.724919931339498e-08, "loss": 0.7558, "step": 27398 }, { "epoch": 0.967782099792926, "grad_norm": 2.1384687423706055, "learning_rate": 2.7189594567411103e-08, "loss": 0.7934, "step": 27399 }, { "epoch": 0.9678174215966339, "grad_norm": 1.6189076900482178, "learning_rate": 2.7130054904635495e-08, "loss": 0.7572, "step": 27400 }, { "epoch": 0.9678527434003418, "grad_norm": 1.7525008916854858, "learning_rate": 2.7070580325846418e-08, "loss": 0.7424, "step": 27401 }, { "epoch": 0.9678880652040497, "grad_norm": 2.018564224243164, "learning_rate": 2.7011170831822698e-08, "loss": 0.7652, "step": 27402 }, { "epoch": 0.9679233870077576, "grad_norm": 1.676767349243164, "learning_rate": 2.6951826423342044e-08, "loss": 0.7594, "step": 27403 }, { "epoch": 0.9679587088114655, "grad_norm": 1.5439527034759521, "learning_rate": 2.6892547101181056e-08, "loss": 0.7333, "step": 27404 }, { "epoch": 0.9679940306151734, "grad_norm": 1.8820109367370605, "learning_rate": 2.6833332866115226e-08, "loss": 0.7631, "step": 27405 }, { "epoch": 0.9680293524188812, "grad_norm": 1.6855418682098389, "learning_rate": 2.6774183718920598e-08, "loss": 0.7704, "step": 27406 }, { "epoch": 0.9680646742225891, "grad_norm": 1.5923211574554443, "learning_rate": 2.6715099660370447e-08, "loss": 0.7513, "step": 27407 }, { "epoch": 0.968099996026297, "grad_norm": 1.7775599956512451, "learning_rate": 2.6656080691237485e-08, "loss": 0.7288, "step": 27408 }, { "epoch": 0.968135317830005, "grad_norm": 1.661497950553894, "learning_rate": 2.6597126812295537e-08, "loss": 0.7902, "step": 27409 }, { "epoch": 0.9681706396337129, "grad_norm": 1.7896778583526611, "learning_rate": 2.6538238024315655e-08, "loss": 0.7823, "step": 27410 }, { "epoch": 0.9682059614374208, "grad_norm": 1.7016880512237549, "learning_rate": 2.6479414328067777e-08, "loss": 0.7375, "step": 27411 }, { "epoch": 0.9682412832411287, "grad_norm": 1.7902618646621704, "learning_rate": 2.642065572432295e-08, "loss": 0.7441, "step": 27412 }, { "epoch": 0.9682766050448366, "grad_norm": 1.7147916555404663, "learning_rate": 2.6361962213849458e-08, "loss": 0.7722, "step": 27413 }, { "epoch": 0.9683119268485445, "grad_norm": 1.7205394506454468, "learning_rate": 2.6303333797415565e-08, "loss": 0.7713, "step": 27414 }, { "epoch": 0.9683472486522524, "grad_norm": 1.953057050704956, "learning_rate": 2.624477047578844e-08, "loss": 0.7537, "step": 27415 }, { "epoch": 0.9683825704559603, "grad_norm": 1.6765655279159546, "learning_rate": 2.6186272249735245e-08, "loss": 0.7865, "step": 27416 }, { "epoch": 0.9684178922596682, "grad_norm": 1.6187665462493896, "learning_rate": 2.6127839120020925e-08, "loss": 0.7567, "step": 27417 }, { "epoch": 0.9684532140633761, "grad_norm": 1.6740779876708984, "learning_rate": 2.6069471087410427e-08, "loss": 0.7501, "step": 27418 }, { "epoch": 0.968488535867084, "grad_norm": 1.7713111639022827, "learning_rate": 2.601116815266813e-08, "loss": 0.7381, "step": 27419 }, { "epoch": 0.968523857670792, "grad_norm": 1.6543337106704712, "learning_rate": 2.5952930316556212e-08, "loss": 0.7643, "step": 27420 }, { "epoch": 0.9685591794744999, "grad_norm": 2.767990827560425, "learning_rate": 2.5894757579837393e-08, "loss": 0.7761, "step": 27421 }, { "epoch": 0.9685945012782078, "grad_norm": 1.8314460515975952, "learning_rate": 2.5836649943273283e-08, "loss": 0.8041, "step": 27422 }, { "epoch": 0.9686298230819157, "grad_norm": 1.651987910270691, "learning_rate": 2.577860740762328e-08, "loss": 0.7218, "step": 27423 }, { "epoch": 0.9686651448856236, "grad_norm": 1.966551661491394, "learning_rate": 2.5720629973648438e-08, "loss": 0.7638, "step": 27424 }, { "epoch": 0.9687004666893315, "grad_norm": 1.8224611282348633, "learning_rate": 2.5662717642106483e-08, "loss": 0.7541, "step": 27425 }, { "epoch": 0.9687357884930394, "grad_norm": 1.7697950601577759, "learning_rate": 2.5604870413756257e-08, "loss": 0.7789, "step": 27426 }, { "epoch": 0.9687711102967473, "grad_norm": 1.9396588802337646, "learning_rate": 2.554708828935437e-08, "loss": 0.7333, "step": 27427 }, { "epoch": 0.9688064321004553, "grad_norm": 1.6798468828201294, "learning_rate": 2.5489371269656893e-08, "loss": 0.7725, "step": 27428 }, { "epoch": 0.9688417539041632, "grad_norm": 1.9164135456085205, "learning_rate": 2.543171935541988e-08, "loss": 0.7975, "step": 27429 }, { "epoch": 0.9688770757078711, "grad_norm": 1.572948694229126, "learning_rate": 2.5374132547396622e-08, "loss": 0.7179, "step": 27430 }, { "epoch": 0.968912397511579, "grad_norm": 1.6175330877304077, "learning_rate": 2.5316610846342072e-08, "loss": 0.7737, "step": 27431 }, { "epoch": 0.9689477193152868, "grad_norm": 1.6196963787078857, "learning_rate": 2.5259154253008954e-08, "loss": 0.7562, "step": 27432 }, { "epoch": 0.9689830411189947, "grad_norm": 2.048302412033081, "learning_rate": 2.5201762768148342e-08, "loss": 0.7588, "step": 27433 }, { "epoch": 0.9690183629227026, "grad_norm": 1.8243930339813232, "learning_rate": 2.5144436392511852e-08, "loss": 0.7553, "step": 27434 }, { "epoch": 0.9690536847264105, "grad_norm": 1.650734305381775, "learning_rate": 2.5087175126849994e-08, "loss": 0.747, "step": 27435 }, { "epoch": 0.9690890065301184, "grad_norm": 2.277892589569092, "learning_rate": 2.5029978971912172e-08, "loss": 0.7378, "step": 27436 }, { "epoch": 0.9691243283338263, "grad_norm": 1.5607963800430298, "learning_rate": 2.4972847928446676e-08, "loss": 0.746, "step": 27437 }, { "epoch": 0.9691596501375342, "grad_norm": 0.9425302147865295, "learning_rate": 2.4915781997200685e-08, "loss": 0.5723, "step": 27438 }, { "epoch": 0.9691949719412422, "grad_norm": 1.7528632879257202, "learning_rate": 2.4858781178922486e-08, "loss": 0.7387, "step": 27439 }, { "epoch": 0.9692302937449501, "grad_norm": 1.7466715574264526, "learning_rate": 2.4801845474357045e-08, "loss": 0.7967, "step": 27440 }, { "epoch": 0.969265615548658, "grad_norm": 2.0017642974853516, "learning_rate": 2.4744974884249873e-08, "loss": 0.7512, "step": 27441 }, { "epoch": 0.9693009373523659, "grad_norm": 1.5678787231445312, "learning_rate": 2.4688169409345374e-08, "loss": 0.7512, "step": 27442 }, { "epoch": 0.9693362591560738, "grad_norm": 1.7664477825164795, "learning_rate": 2.4631429050386845e-08, "loss": 0.7738, "step": 27443 }, { "epoch": 0.9693715809597817, "grad_norm": 1.8330414295196533, "learning_rate": 2.457475380811647e-08, "loss": 0.7601, "step": 27444 }, { "epoch": 0.9694069027634896, "grad_norm": 1.6511988639831543, "learning_rate": 2.4518143683276985e-08, "loss": 0.7265, "step": 27445 }, { "epoch": 0.9694422245671975, "grad_norm": 1.8948640823364258, "learning_rate": 2.446159867660891e-08, "loss": 0.7582, "step": 27446 }, { "epoch": 0.9694775463709054, "grad_norm": 1.6321121454238892, "learning_rate": 2.4405118788851657e-08, "loss": 0.7325, "step": 27447 }, { "epoch": 0.9695128681746134, "grad_norm": 1.7693551778793335, "learning_rate": 2.4348704020745184e-08, "loss": 0.7484, "step": 27448 }, { "epoch": 0.9695481899783213, "grad_norm": 1.5962822437286377, "learning_rate": 2.429235437302724e-08, "loss": 0.7896, "step": 27449 }, { "epoch": 0.9695835117820292, "grad_norm": 1.0755219459533691, "learning_rate": 2.4236069846435562e-08, "loss": 0.5676, "step": 27450 }, { "epoch": 0.9696188335857371, "grad_norm": 1.7285497188568115, "learning_rate": 2.417985044170734e-08, "loss": 0.7523, "step": 27451 }, { "epoch": 0.969654155389445, "grad_norm": 1.666253685951233, "learning_rate": 2.4123696159577547e-08, "loss": 0.777, "step": 27452 }, { "epoch": 0.9696894771931529, "grad_norm": 2.912522554397583, "learning_rate": 2.40676070007817e-08, "loss": 0.7954, "step": 27453 }, { "epoch": 0.9697247989968608, "grad_norm": 2.0039234161376953, "learning_rate": 2.4011582966053103e-08, "loss": 0.7733, "step": 27454 }, { "epoch": 0.9697601208005687, "grad_norm": 1.7071359157562256, "learning_rate": 2.395562405612617e-08, "loss": 0.769, "step": 27455 }, { "epoch": 0.9697954426042766, "grad_norm": 1.7625112533569336, "learning_rate": 2.389973027173198e-08, "loss": 0.7595, "step": 27456 }, { "epoch": 0.9698307644079845, "grad_norm": 1.5917611122131348, "learning_rate": 2.3843901613603283e-08, "loss": 0.7205, "step": 27457 }, { "epoch": 0.9698660862116923, "grad_norm": 1.8425723314285278, "learning_rate": 2.3788138082470048e-08, "loss": 0.7751, "step": 27458 }, { "epoch": 0.9699014080154003, "grad_norm": 1.5743827819824219, "learning_rate": 2.3732439679062245e-08, "loss": 0.7436, "step": 27459 }, { "epoch": 0.9699367298191082, "grad_norm": 1.6587715148925781, "learning_rate": 2.3676806404108744e-08, "loss": 0.7423, "step": 27460 }, { "epoch": 0.9699720516228161, "grad_norm": 1.7326257228851318, "learning_rate": 2.3621238258337843e-08, "loss": 0.7794, "step": 27461 }, { "epoch": 0.970007373426524, "grad_norm": 1.6056787967681885, "learning_rate": 2.3565735242476183e-08, "loss": 0.7334, "step": 27462 }, { "epoch": 0.9700426952302319, "grad_norm": 1.7459421157836914, "learning_rate": 2.351029735725152e-08, "loss": 0.7712, "step": 27463 }, { "epoch": 0.9700780170339398, "grad_norm": 1.6373738050460815, "learning_rate": 2.3454924603388274e-08, "loss": 0.7687, "step": 27464 }, { "epoch": 0.9701133388376477, "grad_norm": 1.6471257209777832, "learning_rate": 2.3399616981611416e-08, "loss": 0.7406, "step": 27465 }, { "epoch": 0.9701486606413556, "grad_norm": 1.9601141214370728, "learning_rate": 2.3344374492644818e-08, "loss": 0.7617, "step": 27466 }, { "epoch": 0.9701839824450635, "grad_norm": 1.6866079568862915, "learning_rate": 2.3289197137211785e-08, "loss": 0.7574, "step": 27467 }, { "epoch": 0.9702193042487715, "grad_norm": 1.7775999307632446, "learning_rate": 2.323408491603396e-08, "loss": 0.7786, "step": 27468 }, { "epoch": 0.9702546260524794, "grad_norm": 1.7801603078842163, "learning_rate": 2.3179037829832994e-08, "loss": 0.7764, "step": 27469 }, { "epoch": 0.9702899478561873, "grad_norm": 1.836440086364746, "learning_rate": 2.312405587932942e-08, "loss": 0.7524, "step": 27470 }, { "epoch": 0.9703252696598952, "grad_norm": 1.610908031463623, "learning_rate": 2.3069139065242664e-08, "loss": 0.7646, "step": 27471 }, { "epoch": 0.9703605914636031, "grad_norm": 1.6997950077056885, "learning_rate": 2.3014287388291036e-08, "loss": 0.7565, "step": 27472 }, { "epoch": 0.970395913267311, "grad_norm": 1.6803078651428223, "learning_rate": 2.2959500849193405e-08, "loss": 0.7466, "step": 27473 }, { "epoch": 0.9704312350710189, "grad_norm": 1.6757887601852417, "learning_rate": 2.290477944866587e-08, "loss": 0.7644, "step": 27474 }, { "epoch": 0.9704665568747268, "grad_norm": 1.5506094694137573, "learning_rate": 2.2850123187425077e-08, "loss": 0.7238, "step": 27475 }, { "epoch": 0.9705018786784347, "grad_norm": 1.6753370761871338, "learning_rate": 2.279553206618601e-08, "loss": 0.7454, "step": 27476 }, { "epoch": 0.9705372004821426, "grad_norm": 1.6301015615463257, "learning_rate": 2.274100608566365e-08, "loss": 0.732, "step": 27477 }, { "epoch": 0.9705725222858506, "grad_norm": 1.7255628108978271, "learning_rate": 2.2686545246571325e-08, "loss": 0.76, "step": 27478 }, { "epoch": 0.9706078440895585, "grad_norm": 1.6723824739456177, "learning_rate": 2.2632149549621784e-08, "loss": 0.7905, "step": 27479 }, { "epoch": 0.9706431658932664, "grad_norm": 1.6111657619476318, "learning_rate": 2.25778189955278e-08, "loss": 0.7298, "step": 27480 }, { "epoch": 0.9706784876969743, "grad_norm": 1.6282567977905273, "learning_rate": 2.2523553584998802e-08, "loss": 0.7339, "step": 27481 }, { "epoch": 0.9707138095006822, "grad_norm": 1.7419061660766602, "learning_rate": 2.246935331874589e-08, "loss": 0.7675, "step": 27482 }, { "epoch": 0.9707491313043901, "grad_norm": 2.081778049468994, "learning_rate": 2.2415218197478495e-08, "loss": 0.8127, "step": 27483 }, { "epoch": 0.9707844531080979, "grad_norm": 1.6920846700668335, "learning_rate": 2.2361148221905495e-08, "loss": 0.7507, "step": 27484 }, { "epoch": 0.9708197749118058, "grad_norm": 1.6494868993759155, "learning_rate": 2.2307143392733544e-08, "loss": 0.745, "step": 27485 }, { "epoch": 0.9708550967155137, "grad_norm": 0.8440555930137634, "learning_rate": 2.2253203710670413e-08, "loss": 0.5592, "step": 27486 }, { "epoch": 0.9708904185192216, "grad_norm": 2.408174753189087, "learning_rate": 2.2199329176421648e-08, "loss": 0.7541, "step": 27487 }, { "epoch": 0.9709257403229296, "grad_norm": 1.801511287689209, "learning_rate": 2.2145519790691685e-08, "loss": 0.7554, "step": 27488 }, { "epoch": 0.9709610621266375, "grad_norm": 1.7249561548233032, "learning_rate": 2.2091775554186067e-08, "loss": 0.7549, "step": 27489 }, { "epoch": 0.9709963839303454, "grad_norm": 2.1664528846740723, "learning_rate": 2.2038096467607572e-08, "loss": 0.7517, "step": 27490 }, { "epoch": 0.9710317057340533, "grad_norm": 1.6593658924102783, "learning_rate": 2.1984482531658412e-08, "loss": 0.7423, "step": 27491 }, { "epoch": 0.9710670275377612, "grad_norm": 1.7393012046813965, "learning_rate": 2.1930933747040805e-08, "loss": 0.7717, "step": 27492 }, { "epoch": 0.9711023493414691, "grad_norm": 1.8340941667556763, "learning_rate": 2.1877450114454747e-08, "loss": 0.765, "step": 27493 }, { "epoch": 0.971137671145177, "grad_norm": 1.5878187417984009, "learning_rate": 2.1824031634601896e-08, "loss": 0.7584, "step": 27494 }, { "epoch": 0.9711729929488849, "grad_norm": 1.8171014785766602, "learning_rate": 2.177067830817947e-08, "loss": 0.7808, "step": 27495 }, { "epoch": 0.9712083147525928, "grad_norm": 1.6151803731918335, "learning_rate": 2.171739013588692e-08, "loss": 0.7985, "step": 27496 }, { "epoch": 0.9712436365563007, "grad_norm": 2.0954649448394775, "learning_rate": 2.1664167118421454e-08, "loss": 0.768, "step": 27497 }, { "epoch": 0.9712789583600087, "grad_norm": 1.5420972108840942, "learning_rate": 2.1611009256479188e-08, "loss": 0.7339, "step": 27498 }, { "epoch": 0.9713142801637166, "grad_norm": 1.6615352630615234, "learning_rate": 2.155791655075623e-08, "loss": 0.7703, "step": 27499 }, { "epoch": 0.9713496019674245, "grad_norm": 0.946402370929718, "learning_rate": 2.150488900194758e-08, "loss": 0.5753, "step": 27500 }, { "epoch": 0.9713849237711324, "grad_norm": 1.6605561971664429, "learning_rate": 2.145192661074713e-08, "loss": 0.7566, "step": 27501 }, { "epoch": 0.9714202455748403, "grad_norm": 1.9880499839782715, "learning_rate": 2.1399029377848214e-08, "loss": 0.7513, "step": 27502 }, { "epoch": 0.9714555673785482, "grad_norm": 1.5412929058074951, "learning_rate": 2.1346197303942495e-08, "loss": 0.7545, "step": 27503 }, { "epoch": 0.9714908891822561, "grad_norm": 1.892514944076538, "learning_rate": 2.1293430389722204e-08, "loss": 0.7907, "step": 27504 }, { "epoch": 0.971526210985964, "grad_norm": 1.6971020698547363, "learning_rate": 2.124072863587734e-08, "loss": 0.7372, "step": 27505 }, { "epoch": 0.9715615327896719, "grad_norm": 1.8533788919448853, "learning_rate": 2.1188092043097907e-08, "loss": 0.7618, "step": 27506 }, { "epoch": 0.9715968545933799, "grad_norm": 1.6602367162704468, "learning_rate": 2.1135520612073357e-08, "loss": 0.759, "step": 27507 }, { "epoch": 0.9716321763970878, "grad_norm": 1.897611141204834, "learning_rate": 2.108301434349036e-08, "loss": 0.8051, "step": 27508 }, { "epoch": 0.9716674982007957, "grad_norm": 1.756184458732605, "learning_rate": 2.103057323803781e-08, "loss": 0.7793, "step": 27509 }, { "epoch": 0.9717028200045035, "grad_norm": 1.6912380456924438, "learning_rate": 2.0978197296400715e-08, "loss": 0.7787, "step": 27510 }, { "epoch": 0.9717381418082114, "grad_norm": 1.6602375507354736, "learning_rate": 2.0925886519264636e-08, "loss": 0.7617, "step": 27511 }, { "epoch": 0.9717734636119193, "grad_norm": 2.1916091442108154, "learning_rate": 2.087364090731514e-08, "loss": 0.7283, "step": 27512 }, { "epoch": 0.9718087854156272, "grad_norm": 1.7932546138763428, "learning_rate": 2.0821460461235565e-08, "loss": 0.7468, "step": 27513 }, { "epoch": 0.9718441072193351, "grad_norm": 1.6363762617111206, "learning_rate": 2.0769345181708145e-08, "loss": 0.7295, "step": 27514 }, { "epoch": 0.971879429023043, "grad_norm": 1.7885044813156128, "learning_rate": 2.0717295069415664e-08, "loss": 0.7841, "step": 27515 }, { "epoch": 0.9719147508267509, "grad_norm": 1.5754400491714478, "learning_rate": 2.0665310125039807e-08, "loss": 0.7899, "step": 27516 }, { "epoch": 0.9719500726304589, "grad_norm": 0.9892014265060425, "learning_rate": 2.0613390349259467e-08, "loss": 0.5816, "step": 27517 }, { "epoch": 0.9719853944341668, "grad_norm": 1.7214411497116089, "learning_rate": 2.0561535742755766e-08, "loss": 0.7609, "step": 27518 }, { "epoch": 0.9720207162378747, "grad_norm": 1.5046440362930298, "learning_rate": 2.05097463062065e-08, "loss": 0.7441, "step": 27519 }, { "epoch": 0.9720560380415826, "grad_norm": 0.9061424136161804, "learning_rate": 2.045802204028946e-08, "loss": 0.5455, "step": 27520 }, { "epoch": 0.9720913598452905, "grad_norm": 1.6240488290786743, "learning_rate": 2.040636294568188e-08, "loss": 0.7359, "step": 27521 }, { "epoch": 0.9721266816489984, "grad_norm": 1.8200503587722778, "learning_rate": 2.0354769023059885e-08, "loss": 0.7251, "step": 27522 }, { "epoch": 0.9721620034527063, "grad_norm": 1.8484309911727905, "learning_rate": 2.030324027309849e-08, "loss": 0.764, "step": 27523 }, { "epoch": 0.9721973252564142, "grad_norm": 1.651379942893982, "learning_rate": 2.025177669647216e-08, "loss": 0.7626, "step": 27524 }, { "epoch": 0.9722326470601221, "grad_norm": 1.5603684186935425, "learning_rate": 2.020037829385424e-08, "loss": 0.7555, "step": 27525 }, { "epoch": 0.97226796886383, "grad_norm": 1.6745789051055908, "learning_rate": 2.014904506591808e-08, "loss": 0.755, "step": 27526 }, { "epoch": 0.972303290667538, "grad_norm": 1.6370359659194946, "learning_rate": 2.0097777013335373e-08, "loss": 0.7757, "step": 27527 }, { "epoch": 0.9723386124712459, "grad_norm": 1.9030030965805054, "learning_rate": 2.004657413677613e-08, "loss": 0.7459, "step": 27528 }, { "epoch": 0.9723739342749538, "grad_norm": 1.7046513557434082, "learning_rate": 1.9995436436911487e-08, "loss": 0.7733, "step": 27529 }, { "epoch": 0.9724092560786617, "grad_norm": 1.8144384622573853, "learning_rate": 1.9944363914410348e-08, "loss": 0.7414, "step": 27530 }, { "epoch": 0.9724445778823696, "grad_norm": 4.049593925476074, "learning_rate": 1.9893356569941626e-08, "loss": 0.7595, "step": 27531 }, { "epoch": 0.9724798996860775, "grad_norm": 1.6925190687179565, "learning_rate": 1.9842414404172004e-08, "loss": 0.7808, "step": 27532 }, { "epoch": 0.9725152214897854, "grad_norm": 1.6267640590667725, "learning_rate": 1.9791537417768737e-08, "loss": 0.7668, "step": 27533 }, { "epoch": 0.9725505432934933, "grad_norm": 1.7054857015609741, "learning_rate": 1.974072561139795e-08, "loss": 0.7748, "step": 27534 }, { "epoch": 0.9725858650972012, "grad_norm": 1.7715213298797607, "learning_rate": 1.9689978985724113e-08, "loss": 0.7967, "step": 27535 }, { "epoch": 0.972621186900909, "grad_norm": 1.8977935314178467, "learning_rate": 1.9639297541411694e-08, "loss": 0.7628, "step": 27536 }, { "epoch": 0.972656508704617, "grad_norm": 1.8466490507125854, "learning_rate": 1.9588681279124056e-08, "loss": 0.7439, "step": 27537 }, { "epoch": 0.9726918305083249, "grad_norm": 1.667975664138794, "learning_rate": 1.9538130199523443e-08, "loss": 0.7496, "step": 27538 }, { "epoch": 0.9727271523120328, "grad_norm": 1.8931858539581299, "learning_rate": 1.9487644303271545e-08, "loss": 0.7755, "step": 27539 }, { "epoch": 0.9727624741157407, "grad_norm": 1.7498300075531006, "learning_rate": 1.9437223591029507e-08, "loss": 0.7716, "step": 27540 }, { "epoch": 0.9727977959194486, "grad_norm": 1.7948331832885742, "learning_rate": 1.9386868063456242e-08, "loss": 0.7132, "step": 27541 }, { "epoch": 0.9728331177231565, "grad_norm": 0.9179585576057434, "learning_rate": 1.933657772121178e-08, "loss": 0.5951, "step": 27542 }, { "epoch": 0.9728684395268644, "grad_norm": 0.9648535847663879, "learning_rate": 1.9286352564953924e-08, "loss": 0.5517, "step": 27543 }, { "epoch": 0.9729037613305723, "grad_norm": 1.5780754089355469, "learning_rate": 1.9236192595339376e-08, "loss": 0.7581, "step": 27544 }, { "epoch": 0.9729390831342802, "grad_norm": 1.620245099067688, "learning_rate": 1.9186097813026495e-08, "loss": 0.7632, "step": 27545 }, { "epoch": 0.9729744049379881, "grad_norm": 1.6169805526733398, "learning_rate": 1.9136068218668648e-08, "loss": 0.7546, "step": 27546 }, { "epoch": 0.9730097267416961, "grad_norm": 1.8566441535949707, "learning_rate": 1.9086103812921974e-08, "loss": 0.7367, "step": 27547 }, { "epoch": 0.973045048545404, "grad_norm": 1.874578833580017, "learning_rate": 1.9036204596439846e-08, "loss": 0.7512, "step": 27548 }, { "epoch": 0.9730803703491119, "grad_norm": 1.9169210195541382, "learning_rate": 1.8986370569876178e-08, "loss": 0.7853, "step": 27549 }, { "epoch": 0.9731156921528198, "grad_norm": 2.309764862060547, "learning_rate": 1.8936601733881566e-08, "loss": 0.7563, "step": 27550 }, { "epoch": 0.9731510139565277, "grad_norm": 1.5958882570266724, "learning_rate": 1.8886898089109374e-08, "loss": 0.7354, "step": 27551 }, { "epoch": 0.9731863357602356, "grad_norm": 1.97139573097229, "learning_rate": 1.8837259636208526e-08, "loss": 0.756, "step": 27552 }, { "epoch": 0.9732216575639435, "grad_norm": 1.6941888332366943, "learning_rate": 1.8787686375829616e-08, "loss": 0.7707, "step": 27553 }, { "epoch": 0.9732569793676514, "grad_norm": 2.543278932571411, "learning_rate": 1.8738178308620457e-08, "loss": 0.7962, "step": 27554 }, { "epoch": 0.9732923011713593, "grad_norm": 1.6172709465026855, "learning_rate": 1.8688735435230533e-08, "loss": 0.7834, "step": 27555 }, { "epoch": 0.9733276229750673, "grad_norm": 4.491234302520752, "learning_rate": 1.8639357756305432e-08, "loss": 0.7384, "step": 27556 }, { "epoch": 0.9733629447787752, "grad_norm": 1.5951381921768188, "learning_rate": 1.8590045272491863e-08, "loss": 0.7684, "step": 27557 }, { "epoch": 0.9733982665824831, "grad_norm": 1.7671946287155151, "learning_rate": 1.8540797984435977e-08, "loss": 0.7507, "step": 27558 }, { "epoch": 0.973433588386191, "grad_norm": 1.712623953819275, "learning_rate": 1.84916158927817e-08, "loss": 0.778, "step": 27559 }, { "epoch": 0.9734689101898989, "grad_norm": 1.6489696502685547, "learning_rate": 1.8442498998172408e-08, "loss": 0.7515, "step": 27560 }, { "epoch": 0.9735042319936068, "grad_norm": 1.8571840524673462, "learning_rate": 1.8393447301251478e-08, "loss": 0.7692, "step": 27561 }, { "epoch": 0.9735395537973146, "grad_norm": 1.797473430633545, "learning_rate": 1.8344460802660057e-08, "loss": 0.7592, "step": 27562 }, { "epoch": 0.9735748756010225, "grad_norm": 1.8449703454971313, "learning_rate": 1.8295539503040416e-08, "loss": 0.7834, "step": 27563 }, { "epoch": 0.9736101974047304, "grad_norm": 1.795307993888855, "learning_rate": 1.8246683403032596e-08, "loss": 0.7412, "step": 27564 }, { "epoch": 0.9736455192084383, "grad_norm": 1.9192087650299072, "learning_rate": 1.819789250327553e-08, "loss": 0.7717, "step": 27565 }, { "epoch": 0.9736808410121462, "grad_norm": 1.8213958740234375, "learning_rate": 1.8149166804407592e-08, "loss": 0.7666, "step": 27566 }, { "epoch": 0.9737161628158542, "grad_norm": 1.6716346740722656, "learning_rate": 1.8100506307067167e-08, "loss": 0.7671, "step": 27567 }, { "epoch": 0.9737514846195621, "grad_norm": 1.8157663345336914, "learning_rate": 1.805191101189041e-08, "loss": 0.7753, "step": 27568 }, { "epoch": 0.97378680642327, "grad_norm": 1.5987948179244995, "learning_rate": 1.8003380919514034e-08, "loss": 0.7349, "step": 27569 }, { "epoch": 0.9738221282269779, "grad_norm": 1.5280014276504517, "learning_rate": 1.795491603057309e-08, "loss": 0.7609, "step": 27570 }, { "epoch": 0.9738574500306858, "grad_norm": 1.7667837142944336, "learning_rate": 1.7906516345701506e-08, "loss": 0.7534, "step": 27571 }, { "epoch": 0.9738927718343937, "grad_norm": 1.6697863340377808, "learning_rate": 1.785818186553323e-08, "loss": 0.736, "step": 27572 }, { "epoch": 0.9739280936381016, "grad_norm": 1.861255168914795, "learning_rate": 1.7809912590699973e-08, "loss": 0.7661, "step": 27573 }, { "epoch": 0.9739634154418095, "grad_norm": 1.6302313804626465, "learning_rate": 1.7761708521834566e-08, "loss": 0.75, "step": 27574 }, { "epoch": 0.9739987372455174, "grad_norm": 1.661494493484497, "learning_rate": 1.7713569659566498e-08, "loss": 0.7913, "step": 27575 }, { "epoch": 0.9740340590492254, "grad_norm": 1.7085367441177368, "learning_rate": 1.7665496004527493e-08, "loss": 0.7659, "step": 27576 }, { "epoch": 0.9740693808529333, "grad_norm": 2.201775550842285, "learning_rate": 1.7617487557345937e-08, "loss": 0.7804, "step": 27577 }, { "epoch": 0.9741047026566412, "grad_norm": 1.7580482959747314, "learning_rate": 1.7569544318649657e-08, "loss": 0.7609, "step": 27578 }, { "epoch": 0.9741400244603491, "grad_norm": 1.749182939529419, "learning_rate": 1.7521666289066486e-08, "loss": 0.763, "step": 27579 }, { "epoch": 0.974175346264057, "grad_norm": 1.7693073749542236, "learning_rate": 1.74738534692237e-08, "loss": 0.7718, "step": 27580 }, { "epoch": 0.9742106680677649, "grad_norm": 1.6423068046569824, "learning_rate": 1.74261058597458e-08, "loss": 0.7656, "step": 27581 }, { "epoch": 0.9742459898714728, "grad_norm": 1.6796647310256958, "learning_rate": 1.7378423461258953e-08, "loss": 0.7663, "step": 27582 }, { "epoch": 0.9742813116751807, "grad_norm": 1.6782013177871704, "learning_rate": 1.733080627438655e-08, "loss": 0.7672, "step": 27583 }, { "epoch": 0.9743166334788886, "grad_norm": 1.6931159496307373, "learning_rate": 1.728325429975197e-08, "loss": 0.7424, "step": 27584 }, { "epoch": 0.9743519552825965, "grad_norm": 1.7128711938858032, "learning_rate": 1.723576753797751e-08, "loss": 0.7804, "step": 27585 }, { "epoch": 0.9743872770863045, "grad_norm": 1.6940293312072754, "learning_rate": 1.7188345989684885e-08, "loss": 0.7604, "step": 27586 }, { "epoch": 0.9744225988900124, "grad_norm": 1.6232026815414429, "learning_rate": 1.7140989655494155e-08, "loss": 0.7406, "step": 27587 }, { "epoch": 0.9744579206937202, "grad_norm": 1.6766186952590942, "learning_rate": 1.7093698536025382e-08, "loss": 0.7759, "step": 27588 }, { "epoch": 0.9744932424974281, "grad_norm": 1.5962978601455688, "learning_rate": 1.7046472631897515e-08, "loss": 0.7027, "step": 27589 }, { "epoch": 0.974528564301136, "grad_norm": 1.6925045251846313, "learning_rate": 1.6999311943729503e-08, "loss": 0.7552, "step": 27590 }, { "epoch": 0.9745638861048439, "grad_norm": 1.742611289024353, "learning_rate": 1.6952216472136963e-08, "loss": 0.7443, "step": 27591 }, { "epoch": 0.9745992079085518, "grad_norm": 1.622119426727295, "learning_rate": 1.690518621773718e-08, "loss": 0.7781, "step": 27592 }, { "epoch": 0.9746345297122597, "grad_norm": 4.316903114318848, "learning_rate": 1.685822118114633e-08, "loss": 0.7468, "step": 27593 }, { "epoch": 0.9746698515159676, "grad_norm": 1.665909767150879, "learning_rate": 1.6811321362977805e-08, "loss": 0.7644, "step": 27594 }, { "epoch": 0.9747051733196755, "grad_norm": 2.068549156188965, "learning_rate": 1.6764486763845567e-08, "loss": 0.7357, "step": 27595 }, { "epoch": 0.9747404951233835, "grad_norm": 1.5367215871810913, "learning_rate": 1.6717717384363564e-08, "loss": 0.7749, "step": 27596 }, { "epoch": 0.9747758169270914, "grad_norm": 1.6197713613510132, "learning_rate": 1.667101322514353e-08, "loss": 0.7628, "step": 27597 }, { "epoch": 0.9748111387307993, "grad_norm": 1.5944150686264038, "learning_rate": 1.6624374286796086e-08, "loss": 0.7398, "step": 27598 }, { "epoch": 0.9748464605345072, "grad_norm": 2.020592212677002, "learning_rate": 1.6577800569931856e-08, "loss": 0.7796, "step": 27599 }, { "epoch": 0.9748817823382151, "grad_norm": 1.6326473951339722, "learning_rate": 1.653129207516091e-08, "loss": 0.75, "step": 27600 }, { "epoch": 0.974917104141923, "grad_norm": 1.9429823160171509, "learning_rate": 1.6484848803091647e-08, "loss": 0.7132, "step": 27601 }, { "epoch": 0.9749524259456309, "grad_norm": 1.7773367166519165, "learning_rate": 1.6438470754331913e-08, "loss": 0.7455, "step": 27602 }, { "epoch": 0.9749877477493388, "grad_norm": 1.966664433479309, "learning_rate": 1.6392157929489007e-08, "loss": 0.757, "step": 27603 }, { "epoch": 0.9750230695530467, "grad_norm": 1.7519521713256836, "learning_rate": 1.6345910329167992e-08, "loss": 0.7645, "step": 27604 }, { "epoch": 0.9750583913567547, "grad_norm": 1.8052898645401, "learning_rate": 1.6299727953975052e-08, "loss": 0.7924, "step": 27605 }, { "epoch": 0.9750937131604626, "grad_norm": 2.1956396102905273, "learning_rate": 1.6253610804514708e-08, "loss": 0.7782, "step": 27606 }, { "epoch": 0.9751290349641705, "grad_norm": 1.7957024574279785, "learning_rate": 1.620755888139036e-08, "loss": 0.758, "step": 27607 }, { "epoch": 0.9751643567678784, "grad_norm": 1.6543408632278442, "learning_rate": 1.6161572185204312e-08, "loss": 0.7291, "step": 27608 }, { "epoch": 0.9751996785715863, "grad_norm": 1.6838983297348022, "learning_rate": 1.6115650716558852e-08, "loss": 0.7384, "step": 27609 }, { "epoch": 0.9752350003752942, "grad_norm": 1.6674418449401855, "learning_rate": 1.6069794476054613e-08, "loss": 0.7368, "step": 27610 }, { "epoch": 0.9752703221790021, "grad_norm": 1.7422775030136108, "learning_rate": 1.6024003464292226e-08, "loss": 0.736, "step": 27611 }, { "epoch": 0.97530564398271, "grad_norm": 1.7444839477539062, "learning_rate": 1.5978277681870656e-08, "loss": 0.7769, "step": 27612 }, { "epoch": 0.9753409657864179, "grad_norm": 1.5128037929534912, "learning_rate": 1.5932617129388873e-08, "loss": 0.7689, "step": 27613 }, { "epoch": 0.9753762875901257, "grad_norm": 2.1177926063537598, "learning_rate": 1.588702180744417e-08, "loss": 0.7778, "step": 27614 }, { "epoch": 0.9754116093938336, "grad_norm": 1.7568508386611938, "learning_rate": 1.5841491716632738e-08, "loss": 0.7766, "step": 27615 }, { "epoch": 0.9754469311975416, "grad_norm": 2.2115726470947266, "learning_rate": 1.5796026857551326e-08, "loss": 0.7558, "step": 27616 }, { "epoch": 0.9754822530012495, "grad_norm": 0.9728972911834717, "learning_rate": 1.575062723079446e-08, "loss": 0.5667, "step": 27617 }, { "epoch": 0.9755175748049574, "grad_norm": 1.569329857826233, "learning_rate": 1.57052928369561e-08, "loss": 0.7538, "step": 27618 }, { "epoch": 0.9755528966086653, "grad_norm": 1.8378421068191528, "learning_rate": 1.5660023676630776e-08, "loss": 0.7468, "step": 27619 }, { "epoch": 0.9755882184123732, "grad_norm": 1.8020315170288086, "learning_rate": 1.5614819750409682e-08, "loss": 0.762, "step": 27620 }, { "epoch": 0.9756235402160811, "grad_norm": 1.799104928970337, "learning_rate": 1.5569681058884566e-08, "loss": 0.7682, "step": 27621 }, { "epoch": 0.975658862019789, "grad_norm": 1.7611305713653564, "learning_rate": 1.5524607602646623e-08, "loss": 0.7445, "step": 27622 }, { "epoch": 0.9756941838234969, "grad_norm": 3.6709952354431152, "learning_rate": 1.5479599382285935e-08, "loss": 0.7365, "step": 27623 }, { "epoch": 0.9757295056272048, "grad_norm": 1.5366361141204834, "learning_rate": 1.5434656398390924e-08, "loss": 0.7907, "step": 27624 }, { "epoch": 0.9757648274309128, "grad_norm": 1.6825125217437744, "learning_rate": 1.538977865155e-08, "loss": 0.7774, "step": 27625 }, { "epoch": 0.9758001492346207, "grad_norm": 1.8376044034957886, "learning_rate": 1.5344966142351035e-08, "loss": 0.7959, "step": 27626 }, { "epoch": 0.9758354710383286, "grad_norm": 1.8501980304718018, "learning_rate": 1.530021887137967e-08, "loss": 0.7438, "step": 27627 }, { "epoch": 0.9758707928420365, "grad_norm": 1.6786307096481323, "learning_rate": 1.525553683922265e-08, "loss": 0.7725, "step": 27628 }, { "epoch": 0.9759061146457444, "grad_norm": 2.0788118839263916, "learning_rate": 1.5210920046463963e-08, "loss": 0.7401, "step": 27629 }, { "epoch": 0.9759414364494523, "grad_norm": 1.9027082920074463, "learning_rate": 1.5166368493687578e-08, "loss": 0.7684, "step": 27630 }, { "epoch": 0.9759767582531602, "grad_norm": 1.574933648109436, "learning_rate": 1.5121882181476366e-08, "loss": 0.7403, "step": 27631 }, { "epoch": 0.9760120800568681, "grad_norm": 1.8514173030853271, "learning_rate": 1.5077461110413194e-08, "loss": 0.7452, "step": 27632 }, { "epoch": 0.976047401860576, "grad_norm": 1.6131939888000488, "learning_rate": 1.5033105281079264e-08, "loss": 0.7697, "step": 27633 }, { "epoch": 0.976082723664284, "grad_norm": 1.7200864553451538, "learning_rate": 1.498881469405522e-08, "loss": 0.803, "step": 27634 }, { "epoch": 0.9761180454679919, "grad_norm": 1.6769304275512695, "learning_rate": 1.4944589349920048e-08, "loss": 0.7578, "step": 27635 }, { "epoch": 0.9761533672716998, "grad_norm": 1.6187540292739868, "learning_rate": 1.4900429249253279e-08, "loss": 0.7428, "step": 27636 }, { "epoch": 0.9761886890754077, "grad_norm": 1.568825125694275, "learning_rate": 1.4856334392632233e-08, "loss": 0.7755, "step": 27637 }, { "epoch": 0.9762240108791156, "grad_norm": 1.7522635459899902, "learning_rate": 1.4812304780634779e-08, "loss": 0.7328, "step": 27638 }, { "epoch": 0.9762593326828235, "grad_norm": 1.7838796377182007, "learning_rate": 1.4768340413836569e-08, "loss": 0.7664, "step": 27639 }, { "epoch": 0.9762946544865313, "grad_norm": 1.6725256443023682, "learning_rate": 1.4724441292813251e-08, "loss": 0.8066, "step": 27640 }, { "epoch": 0.9763299762902392, "grad_norm": 1.7496566772460938, "learning_rate": 1.468060741813937e-08, "loss": 0.7585, "step": 27641 }, { "epoch": 0.9763652980939471, "grad_norm": 1.5686410665512085, "learning_rate": 1.4636838790388352e-08, "loss": 0.7843, "step": 27642 }, { "epoch": 0.976400619897655, "grad_norm": 1.6897178888320923, "learning_rate": 1.459313541013363e-08, "loss": 0.7521, "step": 27643 }, { "epoch": 0.9764359417013629, "grad_norm": 1.6958682537078857, "learning_rate": 1.4549497277946412e-08, "loss": 0.7664, "step": 27644 }, { "epoch": 0.9764712635050709, "grad_norm": 1.630582332611084, "learning_rate": 1.4505924394398464e-08, "loss": 0.7694, "step": 27645 }, { "epoch": 0.9765065853087788, "grad_norm": 1.7434332370758057, "learning_rate": 1.4462416760059327e-08, "loss": 0.746, "step": 27646 }, { "epoch": 0.9765419071124867, "grad_norm": 1.651474118232727, "learning_rate": 1.4418974375499106e-08, "loss": 0.7747, "step": 27647 }, { "epoch": 0.9765772289161946, "grad_norm": 1.6095460653305054, "learning_rate": 1.4375597241286788e-08, "loss": 0.7418, "step": 27648 }, { "epoch": 0.9766125507199025, "grad_norm": 2.5335609912872314, "learning_rate": 1.4332285357988584e-08, "loss": 0.7432, "step": 27649 }, { "epoch": 0.9766478725236104, "grad_norm": 1.8182494640350342, "learning_rate": 1.4289038726172933e-08, "loss": 0.8136, "step": 27650 }, { "epoch": 0.9766831943273183, "grad_norm": 1.7012474536895752, "learning_rate": 1.4245857346404935e-08, "loss": 0.7289, "step": 27651 }, { "epoch": 0.9767185161310262, "grad_norm": 1.6593856811523438, "learning_rate": 1.4202741219249694e-08, "loss": 0.777, "step": 27652 }, { "epoch": 0.9767538379347341, "grad_norm": 1.6800791025161743, "learning_rate": 1.415969034527176e-08, "loss": 0.7711, "step": 27653 }, { "epoch": 0.976789159738442, "grad_norm": 1.6930220127105713, "learning_rate": 1.4116704725034568e-08, "loss": 0.7678, "step": 27654 }, { "epoch": 0.97682448154215, "grad_norm": 1.6439825296401978, "learning_rate": 1.4073784359101006e-08, "loss": 0.7386, "step": 27655 }, { "epoch": 0.9768598033458579, "grad_norm": 3.01332688331604, "learning_rate": 1.4030929248032288e-08, "loss": 0.7884, "step": 27656 }, { "epoch": 0.9768951251495658, "grad_norm": 1.833727240562439, "learning_rate": 1.3988139392389633e-08, "loss": 0.756, "step": 27657 }, { "epoch": 0.9769304469532737, "grad_norm": 1.8501545190811157, "learning_rate": 1.3945414792733147e-08, "loss": 0.7879, "step": 27658 }, { "epoch": 0.9769657687569816, "grad_norm": 1.8376764059066772, "learning_rate": 1.3902755449621275e-08, "loss": 0.7553, "step": 27659 }, { "epoch": 0.9770010905606895, "grad_norm": 1.6305005550384521, "learning_rate": 1.3860161363613011e-08, "loss": 0.7506, "step": 27660 }, { "epoch": 0.9770364123643974, "grad_norm": 2.904843330383301, "learning_rate": 1.3817632535265691e-08, "loss": 0.7815, "step": 27661 }, { "epoch": 0.9770717341681053, "grad_norm": 0.9611453413963318, "learning_rate": 1.3775168965136088e-08, "loss": 0.5754, "step": 27662 }, { "epoch": 0.9771070559718132, "grad_norm": 0.8730177283287048, "learning_rate": 1.3732770653779316e-08, "loss": 0.5701, "step": 27663 }, { "epoch": 0.9771423777755212, "grad_norm": 1.6814255714416504, "learning_rate": 1.3690437601751038e-08, "loss": 0.7364, "step": 27664 }, { "epoch": 0.9771776995792291, "grad_norm": 1.6872897148132324, "learning_rate": 1.3648169809605261e-08, "loss": 0.8056, "step": 27665 }, { "epoch": 0.977213021382937, "grad_norm": 1.5905301570892334, "learning_rate": 1.3605967277894317e-08, "loss": 0.7685, "step": 27666 }, { "epoch": 0.9772483431866448, "grad_norm": 1.5829147100448608, "learning_rate": 1.3563830007171653e-08, "loss": 0.7474, "step": 27667 }, { "epoch": 0.9772836649903527, "grad_norm": 1.5850021839141846, "learning_rate": 1.3521757997987939e-08, "loss": 0.7079, "step": 27668 }, { "epoch": 0.9773189867940606, "grad_norm": 1.7815179824829102, "learning_rate": 1.3479751250893846e-08, "loss": 0.7786, "step": 27669 }, { "epoch": 0.9773543085977685, "grad_norm": 1.6464208364486694, "learning_rate": 1.3437809766440047e-08, "loss": 0.7994, "step": 27670 }, { "epoch": 0.9773896304014764, "grad_norm": 1.854194164276123, "learning_rate": 1.3395933545174433e-08, "loss": 0.7868, "step": 27671 }, { "epoch": 0.9774249522051843, "grad_norm": 2.3223304748535156, "learning_rate": 1.3354122587645456e-08, "loss": 0.7438, "step": 27672 }, { "epoch": 0.9774602740088922, "grad_norm": 1.7773816585540771, "learning_rate": 1.3312376894400458e-08, "loss": 0.7923, "step": 27673 }, { "epoch": 0.9774955958126001, "grad_norm": 1.7087525129318237, "learning_rate": 1.3270696465985666e-08, "loss": 0.7643, "step": 27674 }, { "epoch": 0.9775309176163081, "grad_norm": 1.6885031461715698, "learning_rate": 1.3229081302946756e-08, "loss": 0.7832, "step": 27675 }, { "epoch": 0.977566239420016, "grad_norm": 1.9055209159851074, "learning_rate": 1.3187531405827735e-08, "loss": 0.7766, "step": 27676 }, { "epoch": 0.9776015612237239, "grad_norm": 1.758569598197937, "learning_rate": 1.314604677517317e-08, "loss": 0.749, "step": 27677 }, { "epoch": 0.9776368830274318, "grad_norm": 1.6091487407684326, "learning_rate": 1.310462741152596e-08, "loss": 0.7665, "step": 27678 }, { "epoch": 0.9776722048311397, "grad_norm": 1.6919580698013306, "learning_rate": 1.3063273315427893e-08, "loss": 0.7809, "step": 27679 }, { "epoch": 0.9777075266348476, "grad_norm": 1.716820240020752, "learning_rate": 1.3021984487419647e-08, "loss": 0.7441, "step": 27680 }, { "epoch": 0.9777428484385555, "grad_norm": 1.6865829229354858, "learning_rate": 1.2980760928043013e-08, "loss": 0.7558, "step": 27681 }, { "epoch": 0.9777781702422634, "grad_norm": 2.2602083683013916, "learning_rate": 1.2939602637836447e-08, "loss": 0.791, "step": 27682 }, { "epoch": 0.9778134920459713, "grad_norm": 1.639805793762207, "learning_rate": 1.2898509617338407e-08, "loss": 0.7646, "step": 27683 }, { "epoch": 0.9778488138496793, "grad_norm": 1.7472864389419556, "learning_rate": 1.2857481867087907e-08, "loss": 0.7559, "step": 27684 }, { "epoch": 0.9778841356533872, "grad_norm": 2.782062292098999, "learning_rate": 1.2816519387621185e-08, "loss": 0.7999, "step": 27685 }, { "epoch": 0.9779194574570951, "grad_norm": 2.493900775909424, "learning_rate": 1.2775622179473923e-08, "loss": 0.7484, "step": 27686 }, { "epoch": 0.977954779260803, "grad_norm": 0.9228092432022095, "learning_rate": 1.2734790243181805e-08, "loss": 0.5629, "step": 27687 }, { "epoch": 0.9779901010645109, "grad_norm": 1.8926682472229004, "learning_rate": 1.2694023579279402e-08, "loss": 0.77, "step": 27688 }, { "epoch": 0.9780254228682188, "grad_norm": 1.8402520418167114, "learning_rate": 1.2653322188300176e-08, "loss": 0.7604, "step": 27689 }, { "epoch": 0.9780607446719267, "grad_norm": 1.6302030086517334, "learning_rate": 1.2612686070777037e-08, "loss": 0.7745, "step": 27690 }, { "epoch": 0.9780960664756346, "grad_norm": 1.5729591846466064, "learning_rate": 1.2572115227241222e-08, "loss": 0.7387, "step": 27691 }, { "epoch": 0.9781313882793425, "grad_norm": 1.6040314435958862, "learning_rate": 1.2531609658223975e-08, "loss": 0.7303, "step": 27692 }, { "epoch": 0.9781667100830503, "grad_norm": 1.674362301826477, "learning_rate": 1.2491169364255428e-08, "loss": 0.7786, "step": 27693 }, { "epoch": 0.9782020318867582, "grad_norm": 2.2517287731170654, "learning_rate": 1.2450794345865157e-08, "loss": 0.7451, "step": 27694 }, { "epoch": 0.9782373536904662, "grad_norm": 1.8858871459960938, "learning_rate": 1.2410484603581074e-08, "loss": 0.7852, "step": 27695 }, { "epoch": 0.9782726754941741, "grad_norm": 1.5877244472503662, "learning_rate": 1.2370240137931088e-08, "loss": 0.7517, "step": 27696 }, { "epoch": 0.978307997297882, "grad_norm": 1.56863534450531, "learning_rate": 1.2330060949442003e-08, "loss": 0.7302, "step": 27697 }, { "epoch": 0.9783433191015899, "grad_norm": 2.579782247543335, "learning_rate": 1.2289947038639505e-08, "loss": 0.7588, "step": 27698 }, { "epoch": 0.9783786409052978, "grad_norm": 2.1328346729278564, "learning_rate": 1.2249898406048177e-08, "loss": 0.773, "step": 27699 }, { "epoch": 0.9784139627090057, "grad_norm": 1.756861686706543, "learning_rate": 1.2209915052193155e-08, "loss": 0.7983, "step": 27700 }, { "epoch": 0.9784492845127136, "grad_norm": 1.5400980710983276, "learning_rate": 1.2169996977596799e-08, "loss": 0.7582, "step": 27701 }, { "epoch": 0.9784846063164215, "grad_norm": 1.573682188987732, "learning_rate": 1.2130144182781468e-08, "loss": 0.7078, "step": 27702 }, { "epoch": 0.9785199281201294, "grad_norm": 1.5944066047668457, "learning_rate": 1.2090356668270075e-08, "loss": 0.7659, "step": 27703 }, { "epoch": 0.9785552499238374, "grad_norm": 3.9032535552978516, "learning_rate": 1.2050634434582209e-08, "loss": 0.7848, "step": 27704 }, { "epoch": 0.9785905717275453, "grad_norm": 1.842734694480896, "learning_rate": 1.2010977482237451e-08, "loss": 0.7707, "step": 27705 }, { "epoch": 0.9786258935312532, "grad_norm": 1.7321890592575073, "learning_rate": 1.1971385811755943e-08, "loss": 0.8077, "step": 27706 }, { "epoch": 0.9786612153349611, "grad_norm": 1.6988012790679932, "learning_rate": 1.1931859423655045e-08, "loss": 0.7446, "step": 27707 }, { "epoch": 0.978696537138669, "grad_norm": 1.875632643699646, "learning_rate": 1.189239831845268e-08, "loss": 0.7204, "step": 27708 }, { "epoch": 0.9787318589423769, "grad_norm": 1.6479535102844238, "learning_rate": 1.1853002496664546e-08, "loss": 0.7218, "step": 27709 }, { "epoch": 0.9787671807460848, "grad_norm": 1.9312220811843872, "learning_rate": 1.1813671958806894e-08, "loss": 0.7404, "step": 27710 }, { "epoch": 0.9788025025497927, "grad_norm": 1.807340145111084, "learning_rate": 1.1774406705394315e-08, "loss": 0.7265, "step": 27711 }, { "epoch": 0.9788378243535006, "grad_norm": 1.7159429788589478, "learning_rate": 1.1735206736940285e-08, "loss": 0.7452, "step": 27712 }, { "epoch": 0.9788731461572086, "grad_norm": 1.84589684009552, "learning_rate": 1.1696072053958286e-08, "loss": 0.7888, "step": 27713 }, { "epoch": 0.9789084679609165, "grad_norm": 1.7385468482971191, "learning_rate": 1.1657002656960682e-08, "loss": 0.7817, "step": 27714 }, { "epoch": 0.9789437897646244, "grad_norm": 1.8208098411560059, "learning_rate": 1.1617998546458175e-08, "loss": 0.8046, "step": 27715 }, { "epoch": 0.9789791115683323, "grad_norm": 1.6745567321777344, "learning_rate": 1.1579059722962027e-08, "loss": 0.7907, "step": 27716 }, { "epoch": 0.9790144333720402, "grad_norm": 1.836443305015564, "learning_rate": 1.1540186186980717e-08, "loss": 0.7637, "step": 27717 }, { "epoch": 0.9790497551757481, "grad_norm": 1.7205638885498047, "learning_rate": 1.1501377939024394e-08, "loss": 0.7677, "step": 27718 }, { "epoch": 0.9790850769794559, "grad_norm": 1.7851121425628662, "learning_rate": 1.1462634979599874e-08, "loss": 0.7658, "step": 27719 }, { "epoch": 0.9791203987831638, "grad_norm": 1.625553011894226, "learning_rate": 1.1423957309214529e-08, "loss": 0.7548, "step": 27720 }, { "epoch": 0.9791557205868717, "grad_norm": 1.7821738719940186, "learning_rate": 1.1385344928375175e-08, "loss": 0.7343, "step": 27721 }, { "epoch": 0.9791910423905796, "grad_norm": 1.8697630167007446, "learning_rate": 1.134679783758641e-08, "loss": 0.7829, "step": 27722 }, { "epoch": 0.9792263641942875, "grad_norm": 1.5459502935409546, "learning_rate": 1.1308316037352829e-08, "loss": 0.7333, "step": 27723 }, { "epoch": 0.9792616859979955, "grad_norm": 2.430007219314575, "learning_rate": 1.1269899528178474e-08, "loss": 0.7355, "step": 27724 }, { "epoch": 0.9792970078017034, "grad_norm": 1.865071415901184, "learning_rate": 1.1231548310565721e-08, "loss": 0.7532, "step": 27725 }, { "epoch": 0.9793323296054113, "grad_norm": 1.6391851902008057, "learning_rate": 1.1193262385016946e-08, "loss": 0.759, "step": 27726 }, { "epoch": 0.9793676514091192, "grad_norm": 1.6947882175445557, "learning_rate": 1.1155041752032858e-08, "loss": 0.7736, "step": 27727 }, { "epoch": 0.9794029732128271, "grad_norm": 2.1586222648620605, "learning_rate": 1.1116886412113615e-08, "loss": 0.766, "step": 27728 }, { "epoch": 0.979438295016535, "grad_norm": 1.8754487037658691, "learning_rate": 1.1078796365758815e-08, "loss": 0.7831, "step": 27729 }, { "epoch": 0.9794736168202429, "grad_norm": 1.6873949766159058, "learning_rate": 1.104077161346695e-08, "loss": 0.7568, "step": 27730 }, { "epoch": 0.9795089386239508, "grad_norm": 0.9448885321617126, "learning_rate": 1.1002812155735953e-08, "loss": 0.5832, "step": 27731 }, { "epoch": 0.9795442604276587, "grad_norm": 1.7483307123184204, "learning_rate": 1.0964917993062097e-08, "loss": 0.7222, "step": 27732 }, { "epoch": 0.9795795822313667, "grad_norm": 1.5562952756881714, "learning_rate": 1.0927089125941093e-08, "loss": 0.7756, "step": 27733 }, { "epoch": 0.9796149040350746, "grad_norm": 1.5677175521850586, "learning_rate": 1.0889325554869213e-08, "loss": 0.7426, "step": 27734 }, { "epoch": 0.9796502258387825, "grad_norm": 1.5406975746154785, "learning_rate": 1.0851627280339395e-08, "loss": 0.7618, "step": 27735 }, { "epoch": 0.9796855476424904, "grad_norm": 1.638945460319519, "learning_rate": 1.0813994302846242e-08, "loss": 0.7542, "step": 27736 }, { "epoch": 0.9797208694461983, "grad_norm": 1.8895957469940186, "learning_rate": 1.077642662288103e-08, "loss": 0.7728, "step": 27737 }, { "epoch": 0.9797561912499062, "grad_norm": 1.6413805484771729, "learning_rate": 1.0738924240936699e-08, "loss": 0.7308, "step": 27738 }, { "epoch": 0.9797915130536141, "grad_norm": 1.774232268333435, "learning_rate": 1.0701487157502854e-08, "loss": 0.7767, "step": 27739 }, { "epoch": 0.979826834857322, "grad_norm": 1.7372899055480957, "learning_rate": 1.0664115373070216e-08, "loss": 0.798, "step": 27740 }, { "epoch": 0.9798621566610299, "grad_norm": 1.9461588859558105, "learning_rate": 1.0626808888127837e-08, "loss": 0.751, "step": 27741 }, { "epoch": 0.9798974784647378, "grad_norm": 1.6294888257980347, "learning_rate": 1.0589567703164216e-08, "loss": 0.7497, "step": 27742 }, { "epoch": 0.9799328002684458, "grad_norm": 0.9983759522438049, "learning_rate": 1.0552391818666186e-08, "loss": 0.5764, "step": 27743 }, { "epoch": 0.9799681220721537, "grad_norm": 1.6602917909622192, "learning_rate": 1.0515281235120023e-08, "loss": 0.7624, "step": 27744 }, { "epoch": 0.9800034438758615, "grad_norm": 1.867335557937622, "learning_rate": 1.0478235953012562e-08, "loss": 0.7442, "step": 27745 }, { "epoch": 0.9800387656795694, "grad_norm": 1.7074460983276367, "learning_rate": 1.0441255972827858e-08, "loss": 0.7381, "step": 27746 }, { "epoch": 0.9800740874832773, "grad_norm": 1.7458680868148804, "learning_rate": 1.0404341295049969e-08, "loss": 0.8028, "step": 27747 }, { "epoch": 0.9801094092869852, "grad_norm": 1.824558973312378, "learning_rate": 1.0367491920162397e-08, "loss": 0.8091, "step": 27748 }, { "epoch": 0.9801447310906931, "grad_norm": 1.8436704874038696, "learning_rate": 1.0330707848646981e-08, "loss": 0.7384, "step": 27749 }, { "epoch": 0.980180052894401, "grad_norm": 1.7622991800308228, "learning_rate": 1.0293989080985555e-08, "loss": 0.7538, "step": 27750 }, { "epoch": 0.9802153746981089, "grad_norm": 2.4890618324279785, "learning_rate": 1.0257335617657738e-08, "loss": 0.7491, "step": 27751 }, { "epoch": 0.9802506965018168, "grad_norm": 2.018425464630127, "learning_rate": 1.0220747459144808e-08, "loss": 0.76, "step": 27752 }, { "epoch": 0.9802860183055248, "grad_norm": 1.7101861238479614, "learning_rate": 1.0184224605924164e-08, "loss": 0.7448, "step": 27753 }, { "epoch": 0.9803213401092327, "grad_norm": 1.6333353519439697, "learning_rate": 1.0147767058474312e-08, "loss": 0.766, "step": 27754 }, { "epoch": 0.9803566619129406, "grad_norm": 1.896285891532898, "learning_rate": 1.0111374817272645e-08, "loss": 0.7795, "step": 27755 }, { "epoch": 0.9803919837166485, "grad_norm": 1.6386895179748535, "learning_rate": 1.0075047882795452e-08, "loss": 0.7382, "step": 27756 }, { "epoch": 0.9804273055203564, "grad_norm": 1.7857825756072998, "learning_rate": 1.0038786255517907e-08, "loss": 0.7509, "step": 27757 }, { "epoch": 0.9804626273240643, "grad_norm": 1.7611664533615112, "learning_rate": 1.0002589935915185e-08, "loss": 0.7579, "step": 27758 }, { "epoch": 0.9804979491277722, "grad_norm": 0.8881353139877319, "learning_rate": 9.966458924459688e-09, "loss": 0.5426, "step": 27759 }, { "epoch": 0.9805332709314801, "grad_norm": 1.6064214706420898, "learning_rate": 9.930393221626033e-09, "loss": 0.7707, "step": 27760 }, { "epoch": 0.980568592735188, "grad_norm": 1.6646854877471924, "learning_rate": 9.894392827884959e-09, "loss": 0.7421, "step": 27761 }, { "epoch": 0.980603914538896, "grad_norm": 1.790274739265442, "learning_rate": 9.85845774370775e-09, "loss": 0.771, "step": 27762 }, { "epoch": 0.9806392363426039, "grad_norm": 1.7011791467666626, "learning_rate": 9.822587969565146e-09, "loss": 0.7369, "step": 27763 }, { "epoch": 0.9806745581463118, "grad_norm": 1.550516963005066, "learning_rate": 9.786783505926766e-09, "loss": 0.7541, "step": 27764 }, { "epoch": 0.9807098799500197, "grad_norm": 1.5815006494522095, "learning_rate": 9.751044353260575e-09, "loss": 0.7546, "step": 27765 }, { "epoch": 0.9807452017537276, "grad_norm": 1.6632689237594604, "learning_rate": 9.715370512035083e-09, "loss": 0.7757, "step": 27766 }, { "epoch": 0.9807805235574355, "grad_norm": 1.7364943027496338, "learning_rate": 9.679761982716584e-09, "loss": 0.7795, "step": 27767 }, { "epoch": 0.9808158453611434, "grad_norm": 1.5860956907272339, "learning_rate": 9.644218765770818e-09, "loss": 0.7528, "step": 27768 }, { "epoch": 0.9808511671648513, "grad_norm": 1.589756965637207, "learning_rate": 9.608740861664078e-09, "loss": 0.7821, "step": 27769 }, { "epoch": 0.9808864889685592, "grad_norm": 1.6621723175048828, "learning_rate": 9.573328270859328e-09, "loss": 0.7725, "step": 27770 }, { "epoch": 0.980921810772267, "grad_norm": 1.8408892154693604, "learning_rate": 9.53798099382175e-09, "loss": 0.7608, "step": 27771 }, { "epoch": 0.9809571325759749, "grad_norm": 1.74583899974823, "learning_rate": 9.502699031012086e-09, "loss": 0.7562, "step": 27772 }, { "epoch": 0.9809924543796829, "grad_norm": 1.715512752532959, "learning_rate": 9.4674823828933e-09, "loss": 0.7877, "step": 27773 }, { "epoch": 0.9810277761833908, "grad_norm": 1.6798179149627686, "learning_rate": 9.432331049926136e-09, "loss": 0.7645, "step": 27774 }, { "epoch": 0.9810630979870987, "grad_norm": 1.7043081521987915, "learning_rate": 9.397245032570223e-09, "loss": 0.7585, "step": 27775 }, { "epoch": 0.9810984197908066, "grad_norm": 1.737225890159607, "learning_rate": 9.362224331285197e-09, "loss": 0.7999, "step": 27776 }, { "epoch": 0.9811337415945145, "grad_norm": 1.9736902713775635, "learning_rate": 9.327268946529023e-09, "loss": 0.7463, "step": 27777 }, { "epoch": 0.9811690633982224, "grad_norm": 1.8940211534500122, "learning_rate": 9.292378878759668e-09, "loss": 0.7424, "step": 27778 }, { "epoch": 0.9812043852019303, "grad_norm": 1.6588149070739746, "learning_rate": 9.257554128433433e-09, "loss": 0.7496, "step": 27779 }, { "epoch": 0.9812397070056382, "grad_norm": 1.787378191947937, "learning_rate": 9.22279469600662e-09, "loss": 0.7236, "step": 27780 }, { "epoch": 0.9812750288093461, "grad_norm": 1.6246010065078735, "learning_rate": 9.188100581932757e-09, "loss": 0.7293, "step": 27781 }, { "epoch": 0.981310350613054, "grad_norm": 1.6523722410202026, "learning_rate": 9.15347178666759e-09, "loss": 0.7346, "step": 27782 }, { "epoch": 0.981345672416762, "grad_norm": 1.6399939060211182, "learning_rate": 9.118908310663532e-09, "loss": 0.7638, "step": 27783 }, { "epoch": 0.9813809942204699, "grad_norm": 1.582476258277893, "learning_rate": 9.084410154372447e-09, "loss": 0.7761, "step": 27784 }, { "epoch": 0.9814163160241778, "grad_norm": 1.62203848361969, "learning_rate": 9.049977318247304e-09, "loss": 0.7755, "step": 27785 }, { "epoch": 0.9814516378278857, "grad_norm": 1.79131019115448, "learning_rate": 9.015609802737191e-09, "loss": 0.8042, "step": 27786 }, { "epoch": 0.9814869596315936, "grad_norm": 1.6997435092926025, "learning_rate": 8.981307608292854e-09, "loss": 0.7477, "step": 27787 }, { "epoch": 0.9815222814353015, "grad_norm": 1.7854654788970947, "learning_rate": 8.947070735362828e-09, "loss": 0.7462, "step": 27788 }, { "epoch": 0.9815576032390094, "grad_norm": 2.848971128463745, "learning_rate": 8.912899184395641e-09, "loss": 0.7681, "step": 27789 }, { "epoch": 0.9815929250427173, "grad_norm": 1.7977612018585205, "learning_rate": 8.878792955837601e-09, "loss": 0.7534, "step": 27790 }, { "epoch": 0.9816282468464252, "grad_norm": 1.7016750574111938, "learning_rate": 8.84475205013613e-09, "loss": 0.7748, "step": 27791 }, { "epoch": 0.9816635686501332, "grad_norm": 1.9846484661102295, "learning_rate": 8.810776467736426e-09, "loss": 0.8059, "step": 27792 }, { "epoch": 0.9816988904538411, "grad_norm": 1.864553451538086, "learning_rate": 8.776866209082579e-09, "loss": 0.8052, "step": 27793 }, { "epoch": 0.981734212257549, "grad_norm": 1.8186211585998535, "learning_rate": 8.743021274619235e-09, "loss": 0.7911, "step": 27794 }, { "epoch": 0.9817695340612569, "grad_norm": 1.6077898740768433, "learning_rate": 8.709241664788815e-09, "loss": 0.7283, "step": 27795 }, { "epoch": 0.9818048558649648, "grad_norm": 1.816145896911621, "learning_rate": 8.675527380033744e-09, "loss": 0.7781, "step": 27796 }, { "epoch": 0.9818401776686726, "grad_norm": 1.6915010213851929, "learning_rate": 8.641878420794781e-09, "loss": 0.7566, "step": 27797 }, { "epoch": 0.9818754994723805, "grad_norm": 1.6612145900726318, "learning_rate": 8.608294787512683e-09, "loss": 0.762, "step": 27798 }, { "epoch": 0.9819108212760884, "grad_norm": 1.642275333404541, "learning_rate": 8.5747764806271e-09, "loss": 0.7483, "step": 27799 }, { "epoch": 0.9819461430797963, "grad_norm": 1.5812119245529175, "learning_rate": 8.54132350057657e-09, "loss": 0.751, "step": 27800 }, { "epoch": 0.9819814648835042, "grad_norm": 2.046130895614624, "learning_rate": 8.507935847799075e-09, "loss": 0.7781, "step": 27801 }, { "epoch": 0.9820167866872122, "grad_norm": 1.5990239381790161, "learning_rate": 8.474613522730934e-09, "loss": 0.7422, "step": 27802 }, { "epoch": 0.9820521084909201, "grad_norm": 1.5713800191879272, "learning_rate": 8.44135652580902e-09, "loss": 0.7078, "step": 27803 }, { "epoch": 0.982087430294628, "grad_norm": 1.9485101699829102, "learning_rate": 8.408164857468536e-09, "loss": 0.7789, "step": 27804 }, { "epoch": 0.9821227520983359, "grad_norm": 1.594159483909607, "learning_rate": 8.375038518143586e-09, "loss": 0.7427, "step": 27805 }, { "epoch": 0.9821580739020438, "grad_norm": 1.726514220237732, "learning_rate": 8.341977508267707e-09, "loss": 0.7355, "step": 27806 }, { "epoch": 0.9821933957057517, "grad_norm": 1.6319711208343506, "learning_rate": 8.308981828273888e-09, "loss": 0.7399, "step": 27807 }, { "epoch": 0.9822287175094596, "grad_norm": 1.7572314739227295, "learning_rate": 8.27605147859345e-09, "loss": 0.7898, "step": 27808 }, { "epoch": 0.9822640393131675, "grad_norm": 1.679963231086731, "learning_rate": 8.243186459657715e-09, "loss": 0.7489, "step": 27809 }, { "epoch": 0.9822993611168754, "grad_norm": 1.6127674579620361, "learning_rate": 8.210386771896894e-09, "loss": 0.7178, "step": 27810 }, { "epoch": 0.9823346829205833, "grad_norm": 1.7568213939666748, "learning_rate": 8.17765241574009e-09, "loss": 0.7834, "step": 27811 }, { "epoch": 0.9823700047242913, "grad_norm": 1.5915933847427368, "learning_rate": 8.144983391615845e-09, "loss": 0.7677, "step": 27812 }, { "epoch": 0.9824053265279992, "grad_norm": 1.6330749988555908, "learning_rate": 8.112379699952155e-09, "loss": 0.7975, "step": 27813 }, { "epoch": 0.9824406483317071, "grad_norm": 1.9702117443084717, "learning_rate": 8.079841341174787e-09, "loss": 0.7711, "step": 27814 }, { "epoch": 0.982475970135415, "grad_norm": 1.753214716911316, "learning_rate": 8.04736831571007e-09, "loss": 0.7834, "step": 27815 }, { "epoch": 0.9825112919391229, "grad_norm": 1.851484775543213, "learning_rate": 8.014960623983215e-09, "loss": 0.7783, "step": 27816 }, { "epoch": 0.9825466137428308, "grad_norm": 1.7677425146102905, "learning_rate": 7.98261826641833e-09, "loss": 0.7393, "step": 27817 }, { "epoch": 0.9825819355465387, "grad_norm": 1.5698027610778809, "learning_rate": 7.950341243437854e-09, "loss": 0.7573, "step": 27818 }, { "epoch": 0.9826172573502466, "grad_norm": 1.7728151082992554, "learning_rate": 7.918129555465891e-09, "loss": 0.7693, "step": 27819 }, { "epoch": 0.9826525791539545, "grad_norm": 1.7068085670471191, "learning_rate": 7.88598320292211e-09, "loss": 0.7304, "step": 27820 }, { "epoch": 0.9826879009576625, "grad_norm": 1.6995714902877808, "learning_rate": 7.85390218622839e-09, "loss": 0.7632, "step": 27821 }, { "epoch": 0.9827232227613704, "grad_norm": 1.7736647129058838, "learning_rate": 7.821886505804954e-09, "loss": 0.7743, "step": 27822 }, { "epoch": 0.9827585445650782, "grad_norm": 1.6884742975234985, "learning_rate": 7.7899361620698e-09, "loss": 0.7322, "step": 27823 }, { "epoch": 0.9827938663687861, "grad_norm": 1.6678017377853394, "learning_rate": 7.758051155441482e-09, "loss": 0.7473, "step": 27824 }, { "epoch": 0.982829188172494, "grad_norm": 1.6477371454238892, "learning_rate": 7.726231486337443e-09, "loss": 0.748, "step": 27825 }, { "epoch": 0.9828645099762019, "grad_norm": 1.9668787717819214, "learning_rate": 7.694477155174018e-09, "loss": 0.7732, "step": 27826 }, { "epoch": 0.9828998317799098, "grad_norm": 3.065199851989746, "learning_rate": 7.662788162366985e-09, "loss": 0.7495, "step": 27827 }, { "epoch": 0.9829351535836177, "grad_norm": 1.7694284915924072, "learning_rate": 7.631164508330458e-09, "loss": 0.7805, "step": 27828 }, { "epoch": 0.9829704753873256, "grad_norm": 1.8338533639907837, "learning_rate": 7.599606193479104e-09, "loss": 0.7879, "step": 27829 }, { "epoch": 0.9830057971910335, "grad_norm": 1.7426677942276, "learning_rate": 7.568113218225926e-09, "loss": 0.7512, "step": 27830 }, { "epoch": 0.9830411189947414, "grad_norm": 1.5682348012924194, "learning_rate": 7.536685582982262e-09, "loss": 0.7409, "step": 27831 }, { "epoch": 0.9830764407984494, "grad_norm": 1.9557586908340454, "learning_rate": 7.50532328816056e-09, "loss": 0.7682, "step": 27832 }, { "epoch": 0.9831117626021573, "grad_norm": 1.8109803199768066, "learning_rate": 7.474026334169937e-09, "loss": 0.7576, "step": 27833 }, { "epoch": 0.9831470844058652, "grad_norm": 1.853281855583191, "learning_rate": 7.442794721421176e-09, "loss": 0.7507, "step": 27834 }, { "epoch": 0.9831824062095731, "grad_norm": 1.6318063735961914, "learning_rate": 7.411628450322839e-09, "loss": 0.7514, "step": 27835 }, { "epoch": 0.983217728013281, "grad_norm": 2.576641321182251, "learning_rate": 7.380527521282377e-09, "loss": 0.7547, "step": 27836 }, { "epoch": 0.9832530498169889, "grad_norm": 1.6821209192276, "learning_rate": 7.349491934707243e-09, "loss": 0.768, "step": 27837 }, { "epoch": 0.9832883716206968, "grad_norm": 3.105994462966919, "learning_rate": 7.318521691003222e-09, "loss": 0.7554, "step": 27838 }, { "epoch": 0.9833236934244047, "grad_norm": 1.8759925365447998, "learning_rate": 7.287616790576102e-09, "loss": 0.7509, "step": 27839 }, { "epoch": 0.9833590152281126, "grad_norm": 1.649703025817871, "learning_rate": 7.256777233829448e-09, "loss": 0.7951, "step": 27840 }, { "epoch": 0.9833943370318206, "grad_norm": 1.6865047216415405, "learning_rate": 7.226003021168493e-09, "loss": 0.7864, "step": 27841 }, { "epoch": 0.9834296588355285, "grad_norm": 1.5050361156463623, "learning_rate": 7.195294152994581e-09, "loss": 0.7491, "step": 27842 }, { "epoch": 0.9834649806392364, "grad_norm": 1.7197387218475342, "learning_rate": 7.164650629710168e-09, "loss": 0.7664, "step": 27843 }, { "epoch": 0.9835003024429443, "grad_norm": 1.5894967317581177, "learning_rate": 7.134072451716045e-09, "loss": 0.7512, "step": 27844 }, { "epoch": 0.9835356242466522, "grad_norm": 1.6829652786254883, "learning_rate": 7.103559619413003e-09, "loss": 0.7604, "step": 27845 }, { "epoch": 0.9835709460503601, "grad_norm": 1.5432840585708618, "learning_rate": 7.073112133200166e-09, "loss": 0.733, "step": 27846 }, { "epoch": 0.983606267854068, "grad_norm": 1.7572567462921143, "learning_rate": 7.04272999347555e-09, "loss": 0.7596, "step": 27847 }, { "epoch": 0.9836415896577759, "grad_norm": 1.6500002145767212, "learning_rate": 7.012413200637169e-09, "loss": 0.8003, "step": 27848 }, { "epoch": 0.9836769114614837, "grad_norm": 1.5828381776809692, "learning_rate": 6.982161755081373e-09, "loss": 0.7744, "step": 27849 }, { "epoch": 0.9837122332651916, "grad_norm": 1.9669352769851685, "learning_rate": 6.951975657205068e-09, "loss": 0.7457, "step": 27850 }, { "epoch": 0.9837475550688995, "grad_norm": 2.025081157684326, "learning_rate": 6.921854907402381e-09, "loss": 0.7645, "step": 27851 }, { "epoch": 0.9837828768726075, "grad_norm": 2.1971638202667236, "learning_rate": 6.891799506067998e-09, "loss": 0.7703, "step": 27852 }, { "epoch": 0.9838181986763154, "grad_norm": 1.6904420852661133, "learning_rate": 6.861809453594936e-09, "loss": 0.7375, "step": 27853 }, { "epoch": 0.9838535204800233, "grad_norm": 1.5854698419570923, "learning_rate": 6.831884750375662e-09, "loss": 0.7586, "step": 27854 }, { "epoch": 0.9838888422837312, "grad_norm": 1.653253197669983, "learning_rate": 6.802025396802636e-09, "loss": 0.7502, "step": 27855 }, { "epoch": 0.9839241640874391, "grad_norm": 1.7400895357131958, "learning_rate": 6.772231393266104e-09, "loss": 0.7077, "step": 27856 }, { "epoch": 0.983959485891147, "grad_norm": 1.8071643114089966, "learning_rate": 6.742502740155754e-09, "loss": 0.7598, "step": 27857 }, { "epoch": 0.9839948076948549, "grad_norm": 1.6010780334472656, "learning_rate": 6.712839437860719e-09, "loss": 0.775, "step": 27858 }, { "epoch": 0.9840301294985628, "grad_norm": 1.573774814605713, "learning_rate": 6.683241486769576e-09, "loss": 0.7312, "step": 27859 }, { "epoch": 0.9840654513022707, "grad_norm": 1.679039716720581, "learning_rate": 6.653708887269794e-09, "loss": 0.7961, "step": 27860 }, { "epoch": 0.9841007731059787, "grad_norm": 1.6828588247299194, "learning_rate": 6.624241639747175e-09, "loss": 0.7691, "step": 27861 }, { "epoch": 0.9841360949096866, "grad_norm": 1.676491379737854, "learning_rate": 6.594839744588077e-09, "loss": 0.7892, "step": 27862 }, { "epoch": 0.9841714167133945, "grad_norm": 1.6661404371261597, "learning_rate": 6.565503202177193e-09, "loss": 0.7637, "step": 27863 }, { "epoch": 0.9842067385171024, "grad_norm": 1.954100489616394, "learning_rate": 6.5362320128981025e-09, "loss": 0.7811, "step": 27864 }, { "epoch": 0.9842420603208103, "grad_norm": 1.7167904376983643, "learning_rate": 6.507026177133835e-09, "loss": 0.7509, "step": 27865 }, { "epoch": 0.9842773821245182, "grad_norm": 2.4407715797424316, "learning_rate": 6.477885695267416e-09, "loss": 0.7619, "step": 27866 }, { "epoch": 0.9843127039282261, "grad_norm": 2.4610090255737305, "learning_rate": 6.448810567679653e-09, "loss": 0.7632, "step": 27867 }, { "epoch": 0.984348025731934, "grad_norm": 1.620130181312561, "learning_rate": 6.419800794751352e-09, "loss": 0.7355, "step": 27868 }, { "epoch": 0.9843833475356419, "grad_norm": 1.8156665563583374, "learning_rate": 6.390856376861654e-09, "loss": 0.7902, "step": 27869 }, { "epoch": 0.9844186693393498, "grad_norm": 2.506211757659912, "learning_rate": 6.3619773143897005e-09, "loss": 0.7762, "step": 27870 }, { "epoch": 0.9844539911430578, "grad_norm": 1.6108535528182983, "learning_rate": 6.333163607714077e-09, "loss": 0.7635, "step": 27871 }, { "epoch": 0.9844893129467657, "grad_norm": 3.4045064449310303, "learning_rate": 6.304415257210594e-09, "loss": 0.7657, "step": 27872 }, { "epoch": 0.9845246347504736, "grad_norm": 3.4361886978149414, "learning_rate": 6.275732263256728e-09, "loss": 0.7793, "step": 27873 }, { "epoch": 0.9845599565541815, "grad_norm": 1.8174002170562744, "learning_rate": 6.24711462622718e-09, "loss": 0.7778, "step": 27874 }, { "epoch": 0.9845952783578893, "grad_norm": 2.2172939777374268, "learning_rate": 6.218562346496093e-09, "loss": 0.7852, "step": 27875 }, { "epoch": 0.9846306001615972, "grad_norm": 1.8560024499893188, "learning_rate": 6.190075424438724e-09, "loss": 0.7616, "step": 27876 }, { "epoch": 0.9846659219653051, "grad_norm": 1.5447331666946411, "learning_rate": 6.161653860426442e-09, "loss": 0.768, "step": 27877 }, { "epoch": 0.984701243769013, "grad_norm": 1.7056889533996582, "learning_rate": 6.1332976548317265e-09, "loss": 0.7913, "step": 27878 }, { "epoch": 0.9847365655727209, "grad_norm": 3.0640194416046143, "learning_rate": 6.1050068080253936e-09, "loss": 0.753, "step": 27879 }, { "epoch": 0.9847718873764288, "grad_norm": 1.7865231037139893, "learning_rate": 6.0767813203782555e-09, "loss": 0.7304, "step": 27880 }, { "epoch": 0.9848072091801368, "grad_norm": 1.5692996978759766, "learning_rate": 6.048621192259463e-09, "loss": 0.7156, "step": 27881 }, { "epoch": 0.9848425309838447, "grad_norm": 1.8574814796447754, "learning_rate": 6.020526424037609e-09, "loss": 0.7316, "step": 27882 }, { "epoch": 0.9848778527875526, "grad_norm": 1.706032395362854, "learning_rate": 5.992497016080179e-09, "loss": 0.7809, "step": 27883 }, { "epoch": 0.9849131745912605, "grad_norm": 1.6018033027648926, "learning_rate": 5.964532968754655e-09, "loss": 0.7387, "step": 27884 }, { "epoch": 0.9849484963949684, "grad_norm": 1.6064910888671875, "learning_rate": 5.936634282425746e-09, "loss": 0.7379, "step": 27885 }, { "epoch": 0.9849838181986763, "grad_norm": 1.9416662454605103, "learning_rate": 5.908800957459826e-09, "loss": 0.7624, "step": 27886 }, { "epoch": 0.9850191400023842, "grad_norm": 1.9164488315582275, "learning_rate": 5.881032994221048e-09, "loss": 0.8118, "step": 27887 }, { "epoch": 0.9850544618060921, "grad_norm": 2.2022321224212646, "learning_rate": 5.853330393071899e-09, "loss": 0.77, "step": 27888 }, { "epoch": 0.9850897836098, "grad_norm": 1.7607306241989136, "learning_rate": 5.8256931543759775e-09, "loss": 0.7863, "step": 27889 }, { "epoch": 0.985125105413508, "grad_norm": 1.6572299003601074, "learning_rate": 5.798121278494106e-09, "loss": 0.7936, "step": 27890 }, { "epoch": 0.9851604272172159, "grad_norm": 1.704599380493164, "learning_rate": 5.770614765788218e-09, "loss": 0.7637, "step": 27891 }, { "epoch": 0.9851957490209238, "grad_norm": 1.6559727191925049, "learning_rate": 5.74317361661747e-09, "loss": 0.7646, "step": 27892 }, { "epoch": 0.9852310708246317, "grad_norm": 1.6459420919418335, "learning_rate": 5.715797831341019e-09, "loss": 0.7167, "step": 27893 }, { "epoch": 0.9852663926283396, "grad_norm": 1.787559151649475, "learning_rate": 5.688487410317467e-09, "loss": 0.7609, "step": 27894 }, { "epoch": 0.9853017144320475, "grad_norm": 1.8387541770935059, "learning_rate": 5.661242353904306e-09, "loss": 0.7725, "step": 27895 }, { "epoch": 0.9853370362357554, "grad_norm": 1.7173216342926025, "learning_rate": 5.634062662457918e-09, "loss": 0.7842, "step": 27896 }, { "epoch": 0.9853723580394633, "grad_norm": 1.7119250297546387, "learning_rate": 5.606948336334683e-09, "loss": 0.7766, "step": 27897 }, { "epoch": 0.9854076798431712, "grad_norm": 2.0720555782318115, "learning_rate": 5.579899375888209e-09, "loss": 0.7539, "step": 27898 }, { "epoch": 0.9854430016468791, "grad_norm": 2.9987120628356934, "learning_rate": 5.552915781473211e-09, "loss": 0.7641, "step": 27899 }, { "epoch": 0.9854783234505871, "grad_norm": 1.7554713487625122, "learning_rate": 5.525997553442742e-09, "loss": 0.7707, "step": 27900 }, { "epoch": 0.9855136452542949, "grad_norm": 1.8729896545410156, "learning_rate": 5.499144692149294e-09, "loss": 0.7781, "step": 27901 }, { "epoch": 0.9855489670580028, "grad_norm": 1.7037482261657715, "learning_rate": 5.472357197943701e-09, "loss": 0.7661, "step": 27902 }, { "epoch": 0.9855842888617107, "grad_norm": 1.7783304452896118, "learning_rate": 5.4456350711779015e-09, "loss": 0.7518, "step": 27903 }, { "epoch": 0.9856196106654186, "grad_norm": 1.8045414686203003, "learning_rate": 5.418978312199952e-09, "loss": 0.7499, "step": 27904 }, { "epoch": 0.9856549324691265, "grad_norm": 1.6098111867904663, "learning_rate": 5.392386921360127e-09, "loss": 0.7421, "step": 27905 }, { "epoch": 0.9856902542728344, "grad_norm": 1.7106937170028687, "learning_rate": 5.365860899005371e-09, "loss": 0.7574, "step": 27906 }, { "epoch": 0.9857255760765423, "grad_norm": 0.9560928344726562, "learning_rate": 5.339400245483739e-09, "loss": 0.5728, "step": 27907 }, { "epoch": 0.9857608978802502, "grad_norm": 1.6197932958602905, "learning_rate": 5.313004961141066e-09, "loss": 0.7297, "step": 27908 }, { "epoch": 0.9857962196839581, "grad_norm": 1.9918711185455322, "learning_rate": 5.2866750463226315e-09, "loss": 0.7654, "step": 27909 }, { "epoch": 0.985831541487666, "grad_norm": 2.582744836807251, "learning_rate": 5.2604105013737135e-09, "loss": 0.7077, "step": 27910 }, { "epoch": 0.985866863291374, "grad_norm": 1.596467137336731, "learning_rate": 5.2342113266373725e-09, "loss": 0.7799, "step": 27911 }, { "epoch": 0.9859021850950819, "grad_norm": 2.528055191040039, "learning_rate": 5.2080775224566675e-09, "loss": 0.7663, "step": 27912 }, { "epoch": 0.9859375068987898, "grad_norm": 1.9453214406967163, "learning_rate": 5.1820090891741e-09, "loss": 0.7618, "step": 27913 }, { "epoch": 0.9859728287024977, "grad_norm": 1.7217049598693848, "learning_rate": 5.156006027129956e-09, "loss": 0.8059, "step": 27914 }, { "epoch": 0.9860081505062056, "grad_norm": 1.830426573753357, "learning_rate": 5.130068336665628e-09, "loss": 0.7484, "step": 27915 }, { "epoch": 0.9860434723099135, "grad_norm": 1.615100622177124, "learning_rate": 5.104196018119734e-09, "loss": 0.7384, "step": 27916 }, { "epoch": 0.9860787941136214, "grad_norm": 1.7620844841003418, "learning_rate": 5.078389071830891e-09, "loss": 0.7546, "step": 27917 }, { "epoch": 0.9861141159173293, "grad_norm": 1.6322550773620605, "learning_rate": 5.0526474981377194e-09, "loss": 0.7259, "step": 27918 }, { "epoch": 0.9861494377210372, "grad_norm": 1.7757171392440796, "learning_rate": 5.02697129737606e-09, "loss": 0.7908, "step": 27919 }, { "epoch": 0.9861847595247452, "grad_norm": 1.5178412199020386, "learning_rate": 5.0013604698828654e-09, "loss": 0.7477, "step": 27920 }, { "epoch": 0.9862200813284531, "grad_norm": 1.6567151546478271, "learning_rate": 4.975815015992869e-09, "loss": 0.8007, "step": 27921 }, { "epoch": 0.986255403132161, "grad_norm": 1.9865220785140991, "learning_rate": 4.950334936040247e-09, "loss": 0.7734, "step": 27922 }, { "epoch": 0.9862907249358689, "grad_norm": 0.9164531230926514, "learning_rate": 4.924920230358621e-09, "loss": 0.5631, "step": 27923 }, { "epoch": 0.9863260467395768, "grad_norm": 1.662635326385498, "learning_rate": 4.899570899280503e-09, "loss": 0.7437, "step": 27924 }, { "epoch": 0.9863613685432847, "grad_norm": 1.9164777994155884, "learning_rate": 4.874286943138407e-09, "loss": 0.7606, "step": 27925 }, { "epoch": 0.9863966903469926, "grad_norm": 2.0535151958465576, "learning_rate": 4.849068362262066e-09, "loss": 0.747, "step": 27926 }, { "epoch": 0.9864320121507004, "grad_norm": 1.7721939086914062, "learning_rate": 4.8239151569828835e-09, "loss": 0.767, "step": 27927 }, { "epoch": 0.9864673339544083, "grad_norm": 1.5537476539611816, "learning_rate": 4.798827327628375e-09, "loss": 0.7659, "step": 27928 }, { "epoch": 0.9865026557581162, "grad_norm": 1.8020684719085693, "learning_rate": 4.773804874528276e-09, "loss": 0.7306, "step": 27929 }, { "epoch": 0.9865379775618242, "grad_norm": 1.820454716682434, "learning_rate": 4.748847798009548e-09, "loss": 0.7343, "step": 27930 }, { "epoch": 0.9865732993655321, "grad_norm": 1.6506975889205933, "learning_rate": 4.7239560983991515e-09, "loss": 0.741, "step": 27931 }, { "epoch": 0.98660862116924, "grad_norm": 1.619106650352478, "learning_rate": 4.699129776022382e-09, "loss": 0.7892, "step": 27932 }, { "epoch": 0.9866439429729479, "grad_norm": 1.8294368982315063, "learning_rate": 4.6743688312045344e-09, "loss": 0.7965, "step": 27933 }, { "epoch": 0.9866792647766558, "grad_norm": 1.5882928371429443, "learning_rate": 4.649673264269794e-09, "loss": 0.7727, "step": 27934 }, { "epoch": 0.9867145865803637, "grad_norm": 1.6392160654067993, "learning_rate": 4.625043075540681e-09, "loss": 0.7614, "step": 27935 }, { "epoch": 0.9867499083840716, "grad_norm": 1.8672914505004883, "learning_rate": 4.600478265339714e-09, "loss": 0.7654, "step": 27936 }, { "epoch": 0.9867852301877795, "grad_norm": 2.124241590499878, "learning_rate": 4.575978833988859e-09, "loss": 0.8005, "step": 27937 }, { "epoch": 0.9868205519914874, "grad_norm": 1.6222676038742065, "learning_rate": 4.5515447818089695e-09, "loss": 0.7874, "step": 27938 }, { "epoch": 0.9868558737951953, "grad_norm": 1.7243516445159912, "learning_rate": 4.527176109119236e-09, "loss": 0.7367, "step": 27939 }, { "epoch": 0.9868911955989033, "grad_norm": 1.6113377809524536, "learning_rate": 4.502872816238291e-09, "loss": 0.7185, "step": 27940 }, { "epoch": 0.9869265174026112, "grad_norm": 1.705933928489685, "learning_rate": 4.478634903485324e-09, "loss": 0.7668, "step": 27941 }, { "epoch": 0.9869618392063191, "grad_norm": 1.6578651666641235, "learning_rate": 4.454462371176748e-09, "loss": 0.7818, "step": 27942 }, { "epoch": 0.986997161010027, "grad_norm": 1.728913426399231, "learning_rate": 4.430355219628979e-09, "loss": 0.7579, "step": 27943 }, { "epoch": 0.9870324828137349, "grad_norm": 1.521574854850769, "learning_rate": 4.406313449157873e-09, "loss": 0.7524, "step": 27944 }, { "epoch": 0.9870678046174428, "grad_norm": 1.7658615112304688, "learning_rate": 4.382337060077624e-09, "loss": 0.7688, "step": 27945 }, { "epoch": 0.9871031264211507, "grad_norm": 1.818942904472351, "learning_rate": 4.358426052702425e-09, "loss": 0.7563, "step": 27946 }, { "epoch": 0.9871384482248586, "grad_norm": 1.0090868473052979, "learning_rate": 4.334580427344803e-09, "loss": 0.5334, "step": 27947 }, { "epoch": 0.9871737700285665, "grad_norm": 1.6207926273345947, "learning_rate": 4.310800184317287e-09, "loss": 0.7456, "step": 27948 }, { "epoch": 0.9872090918322745, "grad_norm": 1.817153811454773, "learning_rate": 4.28708532393074e-09, "loss": 0.7806, "step": 27949 }, { "epoch": 0.9872444136359824, "grad_norm": 1.8155839443206787, "learning_rate": 4.263435846496022e-09, "loss": 0.7751, "step": 27950 }, { "epoch": 0.9872797354396903, "grad_norm": 1.663045883178711, "learning_rate": 4.239851752322333e-09, "loss": 0.7814, "step": 27951 }, { "epoch": 0.9873150572433982, "grad_norm": 1.7889796495437622, "learning_rate": 4.216333041718312e-09, "loss": 0.7551, "step": 27952 }, { "epoch": 0.987350379047106, "grad_norm": 1.7421151399612427, "learning_rate": 4.1928797149920485e-09, "loss": 0.7854, "step": 27953 }, { "epoch": 0.9873857008508139, "grad_norm": 1.8111305236816406, "learning_rate": 4.169491772449963e-09, "loss": 0.7747, "step": 27954 }, { "epoch": 0.9874210226545218, "grad_norm": 1.8149389028549194, "learning_rate": 4.146169214398477e-09, "loss": 0.8034, "step": 27955 }, { "epoch": 0.9874563444582297, "grad_norm": 1.8208377361297607, "learning_rate": 4.1229120411429016e-09, "loss": 0.7658, "step": 27956 }, { "epoch": 0.9874916662619376, "grad_norm": 1.7686165571212769, "learning_rate": 4.099720252987438e-09, "loss": 0.7545, "step": 27957 }, { "epoch": 0.9875269880656455, "grad_norm": 1.6458746194839478, "learning_rate": 4.076593850235733e-09, "loss": 0.7364, "step": 27958 }, { "epoch": 0.9875623098693534, "grad_norm": 1.6656569242477417, "learning_rate": 4.053532833190321e-09, "loss": 0.7562, "step": 27959 }, { "epoch": 0.9875976316730614, "grad_norm": 1.921236276626587, "learning_rate": 4.030537202153184e-09, "loss": 0.7721, "step": 27960 }, { "epoch": 0.9876329534767693, "grad_norm": 1.7596769332885742, "learning_rate": 4.007606957425747e-09, "loss": 0.7913, "step": 27961 }, { "epoch": 0.9876682752804772, "grad_norm": 1.9627889394760132, "learning_rate": 3.984742099307215e-09, "loss": 0.7388, "step": 27962 }, { "epoch": 0.9877035970841851, "grad_norm": 1.517601728439331, "learning_rate": 3.9619426280967935e-09, "loss": 0.7173, "step": 27963 }, { "epoch": 0.987738918887893, "grad_norm": 0.8719956874847412, "learning_rate": 3.9392085440942415e-09, "loss": 0.5424, "step": 27964 }, { "epoch": 0.9877742406916009, "grad_norm": 1.7013472318649292, "learning_rate": 3.916539847595435e-09, "loss": 0.7607, "step": 27965 }, { "epoch": 0.9878095624953088, "grad_norm": 2.42864990234375, "learning_rate": 3.893936538897913e-09, "loss": 0.743, "step": 27966 }, { "epoch": 0.9878448842990167, "grad_norm": 1.7069522142410278, "learning_rate": 3.87139861829755e-09, "loss": 0.7363, "step": 27967 }, { "epoch": 0.9878802061027246, "grad_norm": 1.707055926322937, "learning_rate": 3.848926086089111e-09, "loss": 0.7949, "step": 27968 }, { "epoch": 0.9879155279064326, "grad_norm": 1.6692084074020386, "learning_rate": 3.8265189425668034e-09, "loss": 0.7578, "step": 27969 }, { "epoch": 0.9879508497101405, "grad_norm": 0.9643903374671936, "learning_rate": 3.8041771880237275e-09, "loss": 0.5825, "step": 27970 }, { "epoch": 0.9879861715138484, "grad_norm": 1.5212265253067017, "learning_rate": 3.781900822752427e-09, "loss": 0.7997, "step": 27971 }, { "epoch": 0.9880214933175563, "grad_norm": 2.2981839179992676, "learning_rate": 3.759689847044335e-09, "loss": 0.7243, "step": 27972 }, { "epoch": 0.9880568151212642, "grad_norm": 1.725265622138977, "learning_rate": 3.737544261190329e-09, "loss": 0.7509, "step": 27973 }, { "epoch": 0.9880921369249721, "grad_norm": 1.7480841875076294, "learning_rate": 3.715464065480179e-09, "loss": 0.7219, "step": 27974 }, { "epoch": 0.98812745872868, "grad_norm": 2.087442636489868, "learning_rate": 3.6934492602030967e-09, "loss": 0.7785, "step": 27975 }, { "epoch": 0.9881627805323879, "grad_norm": 1.5944734811782837, "learning_rate": 3.671499845646631e-09, "loss": 0.7299, "step": 27976 }, { "epoch": 0.9881981023360958, "grad_norm": 1.5655978918075562, "learning_rate": 3.649615822098329e-09, "loss": 0.7562, "step": 27977 }, { "epoch": 0.9882334241398038, "grad_norm": 1.6824262142181396, "learning_rate": 3.6277971898446285e-09, "loss": 0.7672, "step": 27978 }, { "epoch": 0.9882687459435116, "grad_norm": 1.7587943077087402, "learning_rate": 3.6060439491714116e-09, "loss": 0.8074, "step": 27979 }, { "epoch": 0.9883040677472195, "grad_norm": 1.9995259046554565, "learning_rate": 3.5843561003623405e-09, "loss": 0.7813, "step": 27980 }, { "epoch": 0.9883393895509274, "grad_norm": 0.908703088760376, "learning_rate": 3.5627336437027427e-09, "loss": 0.565, "step": 27981 }, { "epoch": 0.9883747113546353, "grad_norm": 1.7875666618347168, "learning_rate": 3.5411765794746146e-09, "loss": 0.761, "step": 27982 }, { "epoch": 0.9884100331583432, "grad_norm": 1.7463831901550293, "learning_rate": 3.5196849079605078e-09, "loss": 0.7361, "step": 27983 }, { "epoch": 0.9884453549620511, "grad_norm": 1.923548698425293, "learning_rate": 3.498258629441309e-09, "loss": 0.794, "step": 27984 }, { "epoch": 0.988480676765759, "grad_norm": 1.6519001722335815, "learning_rate": 3.4768977441979045e-09, "loss": 0.7907, "step": 27985 }, { "epoch": 0.9885159985694669, "grad_norm": 1.7641996145248413, "learning_rate": 3.4556022525089606e-09, "loss": 0.766, "step": 27986 }, { "epoch": 0.9885513203731748, "grad_norm": 1.749912142753601, "learning_rate": 3.4343721546548082e-09, "loss": 0.7613, "step": 27987 }, { "epoch": 0.9885866421768827, "grad_norm": 1.5218638181686401, "learning_rate": 3.413207450911893e-09, "loss": 0.7416, "step": 27988 }, { "epoch": 0.9886219639805907, "grad_norm": 1.8419138193130493, "learning_rate": 3.3921081415572157e-09, "loss": 0.7775, "step": 27989 }, { "epoch": 0.9886572857842986, "grad_norm": 1.6399211883544922, "learning_rate": 3.3710742268677764e-09, "loss": 0.7432, "step": 27990 }, { "epoch": 0.9886926075880065, "grad_norm": 1.6587750911712646, "learning_rate": 3.350105707118356e-09, "loss": 0.7565, "step": 27991 }, { "epoch": 0.9887279293917144, "grad_norm": 1.7783079147338867, "learning_rate": 3.329202582583735e-09, "loss": 0.7851, "step": 27992 }, { "epoch": 0.9887632511954223, "grad_norm": 1.6614817380905151, "learning_rate": 3.308364853537027e-09, "loss": 0.8064, "step": 27993 }, { "epoch": 0.9887985729991302, "grad_norm": 1.7518919706344604, "learning_rate": 3.2875925202513482e-09, "loss": 0.7509, "step": 27994 }, { "epoch": 0.9888338948028381, "grad_norm": 1.881592869758606, "learning_rate": 3.266885582998147e-09, "loss": 0.8144, "step": 27995 }, { "epoch": 0.988869216606546, "grad_norm": 1.6061588525772095, "learning_rate": 3.246244042048874e-09, "loss": 0.7343, "step": 27996 }, { "epoch": 0.9889045384102539, "grad_norm": 1.621097207069397, "learning_rate": 3.2256678976733124e-09, "loss": 0.7504, "step": 27997 }, { "epoch": 0.9889398602139619, "grad_norm": 1.6556065082550049, "learning_rate": 3.205157150140692e-09, "loss": 0.7709, "step": 27998 }, { "epoch": 0.9889751820176698, "grad_norm": 1.7389957904815674, "learning_rate": 3.1847117997202413e-09, "loss": 0.7579, "step": 27999 }, { "epoch": 0.9890105038213777, "grad_norm": 1.5644683837890625, "learning_rate": 3.1643318466789697e-09, "loss": 0.7268, "step": 28000 }, { "epoch": 0.9890458256250856, "grad_norm": 1.5528664588928223, "learning_rate": 3.14401729128333e-09, "loss": 0.7497, "step": 28001 }, { "epoch": 0.9890811474287935, "grad_norm": 1.9542405605316162, "learning_rate": 3.123768133799221e-09, "loss": 0.7783, "step": 28002 }, { "epoch": 0.9891164692325014, "grad_norm": 1.7157304286956787, "learning_rate": 3.1035843744919857e-09, "loss": 0.7469, "step": 28003 }, { "epoch": 0.9891517910362093, "grad_norm": 1.8044683933258057, "learning_rate": 3.083466013626413e-09, "loss": 0.7325, "step": 28004 }, { "epoch": 0.9891871128399171, "grad_norm": 1.938348412513733, "learning_rate": 3.0634130514645146e-09, "loss": 0.7468, "step": 28005 }, { "epoch": 0.989222434643625, "grad_norm": 1.6954305171966553, "learning_rate": 3.0434254882694137e-09, "loss": 0.7559, "step": 28006 }, { "epoch": 0.9892577564473329, "grad_norm": 1.6968517303466797, "learning_rate": 3.023503324302568e-09, "loss": 0.779, "step": 28007 }, { "epoch": 0.9892930782510408, "grad_norm": 1.9041157960891724, "learning_rate": 3.0036465598248797e-09, "loss": 0.7857, "step": 28008 }, { "epoch": 0.9893284000547488, "grad_norm": 3.457547187805176, "learning_rate": 2.9838551950961414e-09, "loss": 0.7464, "step": 28009 }, { "epoch": 0.9893637218584567, "grad_norm": 1.570529580116272, "learning_rate": 2.96412923037559e-09, "loss": 0.754, "step": 28010 }, { "epoch": 0.9893990436621646, "grad_norm": 1.619276523590088, "learning_rate": 2.944468665920797e-09, "loss": 0.734, "step": 28011 }, { "epoch": 0.9894343654658725, "grad_norm": 1.7421656847000122, "learning_rate": 2.9248735019898888e-09, "loss": 0.7475, "step": 28012 }, { "epoch": 0.9894696872695804, "grad_norm": 1.9622693061828613, "learning_rate": 2.9053437388387728e-09, "loss": 0.7578, "step": 28013 }, { "epoch": 0.9895050090732883, "grad_norm": 1.6955571174621582, "learning_rate": 2.8858793767228e-09, "loss": 0.7737, "step": 28014 }, { "epoch": 0.9895403308769962, "grad_norm": 1.708762526512146, "learning_rate": 2.8664804158973215e-09, "loss": 0.7771, "step": 28015 }, { "epoch": 0.9895756526807041, "grad_norm": 1.6530537605285645, "learning_rate": 2.8471468566160233e-09, "loss": 0.7801, "step": 28016 }, { "epoch": 0.989610974484412, "grad_norm": 1.7601268291473389, "learning_rate": 2.827878699132036e-09, "loss": 0.7618, "step": 28017 }, { "epoch": 0.98964629628812, "grad_norm": 1.5868487358093262, "learning_rate": 2.808675943697381e-09, "loss": 0.801, "step": 28018 }, { "epoch": 0.9896816180918279, "grad_norm": 1.6769616603851318, "learning_rate": 2.789538590562968e-09, "loss": 0.7501, "step": 28019 }, { "epoch": 0.9897169398955358, "grad_norm": 1.5829296112060547, "learning_rate": 2.770466639980263e-09, "loss": 0.7075, "step": 28020 }, { "epoch": 0.9897522616992437, "grad_norm": 1.6828614473342896, "learning_rate": 2.751460092197955e-09, "loss": 0.7548, "step": 28021 }, { "epoch": 0.9897875835029516, "grad_norm": 1.8379122018814087, "learning_rate": 2.7325189474652904e-09, "loss": 0.7378, "step": 28022 }, { "epoch": 0.9898229053066595, "grad_norm": 1.7200846672058105, "learning_rate": 2.713643206029848e-09, "loss": 0.7832, "step": 28023 }, { "epoch": 0.9898582271103674, "grad_norm": 1.6041840314865112, "learning_rate": 2.6948328681392076e-09, "loss": 0.7712, "step": 28024 }, { "epoch": 0.9898935489140753, "grad_norm": 1.628442645072937, "learning_rate": 2.6760879340387293e-09, "loss": 0.74, "step": 28025 }, { "epoch": 0.9899288707177832, "grad_norm": 1.7997246980667114, "learning_rate": 2.6574084039748813e-09, "loss": 0.7607, "step": 28026 }, { "epoch": 0.9899641925214911, "grad_norm": 4.275341987609863, "learning_rate": 2.638794278190804e-09, "loss": 0.7717, "step": 28027 }, { "epoch": 0.9899995143251991, "grad_norm": 1.599399447441101, "learning_rate": 2.6202455569313e-09, "loss": 0.7497, "step": 28028 }, { "epoch": 0.990034836128907, "grad_norm": 1.607956051826477, "learning_rate": 2.601762240438399e-09, "loss": 0.7414, "step": 28029 }, { "epoch": 0.9900701579326149, "grad_norm": 1.7375800609588623, "learning_rate": 2.5833443289541293e-09, "loss": 0.7621, "step": 28030 }, { "epoch": 0.9901054797363227, "grad_norm": 1.9364354610443115, "learning_rate": 2.5649918227194093e-09, "loss": 0.7237, "step": 28031 }, { "epoch": 0.9901408015400306, "grad_norm": 1.5345289707183838, "learning_rate": 2.5467047219751574e-09, "loss": 0.7386, "step": 28032 }, { "epoch": 0.9901761233437385, "grad_norm": 1.6250004768371582, "learning_rate": 2.528483026960071e-09, "loss": 0.7518, "step": 28033 }, { "epoch": 0.9902114451474464, "grad_norm": 1.9610615968704224, "learning_rate": 2.5103267379128495e-09, "loss": 0.738, "step": 28034 }, { "epoch": 0.9902467669511543, "grad_norm": 1.6619813442230225, "learning_rate": 2.492235855070524e-09, "loss": 0.7252, "step": 28035 }, { "epoch": 0.9902820887548622, "grad_norm": 1.661062479019165, "learning_rate": 2.474210378671238e-09, "loss": 0.7631, "step": 28036 }, { "epoch": 0.9903174105585701, "grad_norm": 1.7546546459197998, "learning_rate": 2.4562503089492486e-09, "loss": 0.7914, "step": 28037 }, { "epoch": 0.990352732362278, "grad_norm": 1.5854169130325317, "learning_rate": 2.438355646141033e-09, "loss": 0.762, "step": 28038 }, { "epoch": 0.990388054165986, "grad_norm": 1.754732370376587, "learning_rate": 2.4205263904797382e-09, "loss": 0.7741, "step": 28039 }, { "epoch": 0.9904233759696939, "grad_norm": 1.6990913152694702, "learning_rate": 2.4027625421996215e-09, "loss": 0.7394, "step": 28040 }, { "epoch": 0.9904586977734018, "grad_norm": 1.6573396921157837, "learning_rate": 2.385064101532164e-09, "loss": 0.7169, "step": 28041 }, { "epoch": 0.9904940195771097, "grad_norm": 1.868154525756836, "learning_rate": 2.3674310687099576e-09, "loss": 0.773, "step": 28042 }, { "epoch": 0.9905293413808176, "grad_norm": 1.6489561796188354, "learning_rate": 2.349863443962819e-09, "loss": 0.7658, "step": 28043 }, { "epoch": 0.9905646631845255, "grad_norm": 2.076235055923462, "learning_rate": 2.3323612275216733e-09, "loss": 0.7997, "step": 28044 }, { "epoch": 0.9905999849882334, "grad_norm": 1.5955228805541992, "learning_rate": 2.3149244196152278e-09, "loss": 0.7555, "step": 28045 }, { "epoch": 0.9906353067919413, "grad_norm": 1.6938743591308594, "learning_rate": 2.2975530204716323e-09, "loss": 0.7637, "step": 28046 }, { "epoch": 0.9906706285956492, "grad_norm": 1.9876829385757446, "learning_rate": 2.280247030317928e-09, "loss": 0.8031, "step": 28047 }, { "epoch": 0.9907059503993572, "grad_norm": 2.3788585662841797, "learning_rate": 2.2630064493806003e-09, "loss": 0.7994, "step": 28048 }, { "epoch": 0.9907412722030651, "grad_norm": 1.701830267906189, "learning_rate": 2.2458312778861348e-09, "loss": 0.7306, "step": 28049 }, { "epoch": 0.990776594006773, "grad_norm": 2.0201194286346436, "learning_rate": 2.228721516058241e-09, "loss": 0.7413, "step": 28050 }, { "epoch": 0.9908119158104809, "grad_norm": 1.7141449451446533, "learning_rate": 2.2116771641217394e-09, "loss": 0.7736, "step": 28051 }, { "epoch": 0.9908472376141888, "grad_norm": 2.143697500228882, "learning_rate": 2.19469822229923e-09, "loss": 0.7856, "step": 28052 }, { "epoch": 0.9908825594178967, "grad_norm": 1.5665967464447021, "learning_rate": 2.1777846908133116e-09, "loss": 0.7852, "step": 28053 }, { "epoch": 0.9909178812216046, "grad_norm": 1.878853678703308, "learning_rate": 2.1609365698843643e-09, "loss": 0.7515, "step": 28054 }, { "epoch": 0.9909532030253125, "grad_norm": 1.7908543348312378, "learning_rate": 2.1441538597344325e-09, "loss": 0.7254, "step": 28055 }, { "epoch": 0.9909885248290204, "grad_norm": 1.6727970838546753, "learning_rate": 2.127436560581675e-09, "loss": 0.7888, "step": 28056 }, { "epoch": 0.9910238466327284, "grad_norm": 1.9858670234680176, "learning_rate": 2.110784672645916e-09, "loss": 0.7594, "step": 28057 }, { "epoch": 0.9910591684364362, "grad_norm": 1.6467915773391724, "learning_rate": 2.0941981961442038e-09, "loss": 0.7549, "step": 28058 }, { "epoch": 0.9910944902401441, "grad_norm": 2.558258295059204, "learning_rate": 2.0776771312946973e-09, "loss": 0.7584, "step": 28059 }, { "epoch": 0.991129812043852, "grad_norm": 1.628506064414978, "learning_rate": 2.0612214783127803e-09, "loss": 0.7924, "step": 28060 }, { "epoch": 0.9911651338475599, "grad_norm": 1.7447565793991089, "learning_rate": 2.0448312374138355e-09, "loss": 0.7723, "step": 28061 }, { "epoch": 0.9912004556512678, "grad_norm": 1.6014379262924194, "learning_rate": 2.028506408813247e-09, "loss": 0.7423, "step": 28062 }, { "epoch": 0.9912357774549757, "grad_norm": 1.7502378225326538, "learning_rate": 2.012246992723621e-09, "loss": 0.7672, "step": 28063 }, { "epoch": 0.9912710992586836, "grad_norm": 1.6407493352890015, "learning_rate": 1.9960529893581217e-09, "loss": 0.7629, "step": 28064 }, { "epoch": 0.9913064210623915, "grad_norm": 1.7879472970962524, "learning_rate": 1.9799243989288008e-09, "loss": 0.7465, "step": 28065 }, { "epoch": 0.9913417428660994, "grad_norm": 2.0049431324005127, "learning_rate": 1.9638612216471564e-09, "loss": 0.7532, "step": 28066 }, { "epoch": 0.9913770646698074, "grad_norm": 1.7897794246673584, "learning_rate": 1.9478634577224653e-09, "loss": 0.812, "step": 28067 }, { "epoch": 0.9914123864735153, "grad_norm": 1.7315196990966797, "learning_rate": 1.9319311073645597e-09, "loss": 0.7538, "step": 28068 }, { "epoch": 0.9914477082772232, "grad_norm": 1.8388848304748535, "learning_rate": 1.9160641707821616e-09, "loss": 0.767, "step": 28069 }, { "epoch": 0.9914830300809311, "grad_norm": 4.5423903465271, "learning_rate": 1.900262648182882e-09, "loss": 0.75, "step": 28070 }, { "epoch": 0.991518351884639, "grad_norm": 2.0393190383911133, "learning_rate": 1.8845265397737787e-09, "loss": 0.772, "step": 28071 }, { "epoch": 0.9915536736883469, "grad_norm": 1.7420405149459839, "learning_rate": 1.8688558457596873e-09, "loss": 0.7623, "step": 28072 }, { "epoch": 0.9915889954920548, "grad_norm": 1.6474494934082031, "learning_rate": 1.853250566347109e-09, "loss": 0.7666, "step": 28073 }, { "epoch": 0.9916243172957627, "grad_norm": 1.6894830465316772, "learning_rate": 1.837710701739215e-09, "loss": 0.7954, "step": 28074 }, { "epoch": 0.9916596390994706, "grad_norm": 1.757453441619873, "learning_rate": 1.8222362521397307e-09, "loss": 0.7555, "step": 28075 }, { "epoch": 0.9916949609031785, "grad_norm": 1.7167242765426636, "learning_rate": 1.8068272177518276e-09, "loss": 0.772, "step": 28076 }, { "epoch": 0.9917302827068865, "grad_norm": 1.8083195686340332, "learning_rate": 1.7914835987759005e-09, "loss": 0.7914, "step": 28077 }, { "epoch": 0.9917656045105944, "grad_norm": 1.6275910139083862, "learning_rate": 1.7762053954140102e-09, "loss": 0.7526, "step": 28078 }, { "epoch": 0.9918009263143023, "grad_norm": 1.6053274869918823, "learning_rate": 1.7609926078654416e-09, "loss": 0.7794, "step": 28079 }, { "epoch": 0.9918362481180102, "grad_norm": 1.6757702827453613, "learning_rate": 1.7458452363294797e-09, "loss": 0.7146, "step": 28080 }, { "epoch": 0.9918715699217181, "grad_norm": 1.7877204418182373, "learning_rate": 1.7307632810042995e-09, "loss": 0.8024, "step": 28081 }, { "epoch": 0.991906891725426, "grad_norm": 1.5751163959503174, "learning_rate": 1.7157467420869655e-09, "loss": 0.739, "step": 28082 }, { "epoch": 0.9919422135291339, "grad_norm": 1.7086340188980103, "learning_rate": 1.7007956197745423e-09, "loss": 0.7411, "step": 28083 }, { "epoch": 0.9919775353328417, "grad_norm": 1.6975809335708618, "learning_rate": 1.6859099142629843e-09, "loss": 0.7694, "step": 28084 }, { "epoch": 0.9920128571365496, "grad_norm": 1.6964168548583984, "learning_rate": 1.6710896257460253e-09, "loss": 0.757, "step": 28085 }, { "epoch": 0.9920481789402575, "grad_norm": 1.623241901397705, "learning_rate": 1.6563347544185094e-09, "loss": 0.7676, "step": 28086 }, { "epoch": 0.9920835007439655, "grad_norm": 1.8142163753509521, "learning_rate": 1.6416453004736155e-09, "loss": 0.7771, "step": 28087 }, { "epoch": 0.9921188225476734, "grad_norm": 1.7668389081954956, "learning_rate": 1.627021264102857e-09, "loss": 0.7764, "step": 28088 }, { "epoch": 0.9921541443513813, "grad_norm": 2.8616538047790527, "learning_rate": 1.6124626454983028e-09, "loss": 0.7991, "step": 28089 }, { "epoch": 0.9921894661550892, "grad_norm": 1.928702712059021, "learning_rate": 1.5979694448503557e-09, "loss": 0.7963, "step": 28090 }, { "epoch": 0.9922247879587971, "grad_norm": 1.6531721353530884, "learning_rate": 1.5835416623483091e-09, "loss": 0.7684, "step": 28091 }, { "epoch": 0.992260109762505, "grad_norm": 1.716552972793579, "learning_rate": 1.5691792981809007e-09, "loss": 0.767, "step": 28092 }, { "epoch": 0.9922954315662129, "grad_norm": 2.3349416255950928, "learning_rate": 1.5548823525368685e-09, "loss": 0.7733, "step": 28093 }, { "epoch": 0.9923307533699208, "grad_norm": 1.7199496030807495, "learning_rate": 1.5406508256032848e-09, "loss": 0.7545, "step": 28094 }, { "epoch": 0.9923660751736287, "grad_norm": 1.6496661901474, "learning_rate": 1.5264847175655572e-09, "loss": 0.7537, "step": 28095 }, { "epoch": 0.9924013969773366, "grad_norm": 1.9373893737792969, "learning_rate": 1.512384028609648e-09, "loss": 0.7532, "step": 28096 }, { "epoch": 0.9924367187810446, "grad_norm": 1.8142205476760864, "learning_rate": 1.4983487589204093e-09, "loss": 0.748, "step": 28097 }, { "epoch": 0.9924720405847525, "grad_norm": 1.6642050743103027, "learning_rate": 1.484378908681028e-09, "loss": 0.7703, "step": 28098 }, { "epoch": 0.9925073623884604, "grad_norm": 1.6600325107574463, "learning_rate": 1.4704744780741354e-09, "loss": 0.7667, "step": 28099 }, { "epoch": 0.9925426841921683, "grad_norm": 1.588674545288086, "learning_rate": 1.4566354672823636e-09, "loss": 0.7553, "step": 28100 }, { "epoch": 0.9925780059958762, "grad_norm": 1.8205738067626953, "learning_rate": 1.4428618764866786e-09, "loss": 0.776, "step": 28101 }, { "epoch": 0.9926133277995841, "grad_norm": 1.4879176616668701, "learning_rate": 1.429153705866937e-09, "loss": 0.7269, "step": 28102 }, { "epoch": 0.992648649603292, "grad_norm": 1.9585201740264893, "learning_rate": 1.4155109556024394e-09, "loss": 0.7614, "step": 28103 }, { "epoch": 0.9926839714069999, "grad_norm": 1.6856601238250732, "learning_rate": 1.401933625872487e-09, "loss": 0.7485, "step": 28104 }, { "epoch": 0.9927192932107078, "grad_norm": 1.7317713499069214, "learning_rate": 1.3884217168547155e-09, "loss": 0.7476, "step": 28105 }, { "epoch": 0.9927546150144158, "grad_norm": 1.6446008682250977, "learning_rate": 1.374975228725095e-09, "loss": 0.7241, "step": 28106 }, { "epoch": 0.9927899368181237, "grad_norm": 1.885359525680542, "learning_rate": 1.3615941616607065e-09, "loss": 0.7908, "step": 28107 }, { "epoch": 0.9928252586218316, "grad_norm": 2.53853702545166, "learning_rate": 1.3482785158358548e-09, "loss": 0.7504, "step": 28108 }, { "epoch": 0.9928605804255395, "grad_norm": 2.5039641857147217, "learning_rate": 1.3350282914248448e-09, "loss": 0.7901, "step": 28109 }, { "epoch": 0.9928959022292473, "grad_norm": 1.9092411994934082, "learning_rate": 1.3218434886014265e-09, "loss": 0.7275, "step": 28110 }, { "epoch": 0.9929312240329552, "grad_norm": 1.636737585067749, "learning_rate": 1.3087241075382395e-09, "loss": 0.7486, "step": 28111 }, { "epoch": 0.9929665458366631, "grad_norm": 1.6545945405960083, "learning_rate": 1.2956701484068135e-09, "loss": 0.7267, "step": 28112 }, { "epoch": 0.993001867640371, "grad_norm": 1.6186468601226807, "learning_rate": 1.2826816113781227e-09, "loss": 0.7336, "step": 28113 }, { "epoch": 0.9930371894440789, "grad_norm": 3.852926015853882, "learning_rate": 1.269758496621476e-09, "loss": 0.7525, "step": 28114 }, { "epoch": 0.9930725112477868, "grad_norm": 1.9753230810165405, "learning_rate": 1.2569008043072927e-09, "loss": 0.7852, "step": 28115 }, { "epoch": 0.9931078330514947, "grad_norm": 1.6387165784835815, "learning_rate": 1.2441085346026617e-09, "loss": 0.7576, "step": 28116 }, { "epoch": 0.9931431548552027, "grad_norm": 2.37270450592041, "learning_rate": 1.2313816876757811e-09, "loss": 0.7469, "step": 28117 }, { "epoch": 0.9931784766589106, "grad_norm": 2.2880759239196777, "learning_rate": 1.2187202636926298e-09, "loss": 0.7501, "step": 28118 }, { "epoch": 0.9932137984626185, "grad_norm": 1.8304444551467896, "learning_rate": 1.2061242628191861e-09, "loss": 0.7463, "step": 28119 }, { "epoch": 0.9932491202663264, "grad_norm": 1.9941556453704834, "learning_rate": 1.193593685220873e-09, "loss": 0.7929, "step": 28120 }, { "epoch": 0.9932844420700343, "grad_norm": 1.7059885263442993, "learning_rate": 1.1811285310603381e-09, "loss": 0.7421, "step": 28121 }, { "epoch": 0.9933197638737422, "grad_norm": 1.6839648485183716, "learning_rate": 1.168728800501895e-09, "loss": 0.7277, "step": 28122 }, { "epoch": 0.9933550856774501, "grad_norm": 1.8084174394607544, "learning_rate": 1.1563944937076354e-09, "loss": 0.7424, "step": 28123 }, { "epoch": 0.993390407481158, "grad_norm": 2.4467053413391113, "learning_rate": 1.1441256108385423e-09, "loss": 0.7713, "step": 28124 }, { "epoch": 0.9934257292848659, "grad_norm": 1.8115348815917969, "learning_rate": 1.1319221520555978e-09, "loss": 0.7366, "step": 28125 }, { "epoch": 0.9934610510885739, "grad_norm": 1.758263349533081, "learning_rate": 1.119784117518119e-09, "loss": 0.7828, "step": 28126 }, { "epoch": 0.9934963728922818, "grad_norm": 1.705989122390747, "learning_rate": 1.1077115073854227e-09, "loss": 0.7798, "step": 28127 }, { "epoch": 0.9935316946959897, "grad_norm": 1.740695595741272, "learning_rate": 1.0957043218157159e-09, "loss": 0.7775, "step": 28128 }, { "epoch": 0.9935670164996976, "grad_norm": 1.6526159048080444, "learning_rate": 1.083762560964985e-09, "loss": 0.772, "step": 28129 }, { "epoch": 0.9936023383034055, "grad_norm": 1.8011858463287354, "learning_rate": 1.0718862249908813e-09, "loss": 0.7622, "step": 28130 }, { "epoch": 0.9936376601071134, "grad_norm": 1.5773131847381592, "learning_rate": 1.060075314047726e-09, "loss": 0.7395, "step": 28131 }, { "epoch": 0.9936729819108213, "grad_norm": 1.58427095413208, "learning_rate": 1.0483298282909505e-09, "loss": 0.7344, "step": 28132 }, { "epoch": 0.9937083037145292, "grad_norm": 1.874588131904602, "learning_rate": 1.0366497678737653e-09, "loss": 0.7633, "step": 28133 }, { "epoch": 0.9937436255182371, "grad_norm": 3.1799943447113037, "learning_rate": 1.0250351329493814e-09, "loss": 0.7826, "step": 28134 }, { "epoch": 0.993778947321945, "grad_norm": 1.6876899003982544, "learning_rate": 1.013485923669344e-09, "loss": 0.7178, "step": 28135 }, { "epoch": 0.9938142691256528, "grad_norm": 1.7504907846450806, "learning_rate": 1.0020021401851988e-09, "loss": 0.752, "step": 28136 }, { "epoch": 0.9938495909293608, "grad_norm": 1.643807053565979, "learning_rate": 9.905837826473808e-10, "loss": 0.7596, "step": 28137 }, { "epoch": 0.9938849127330687, "grad_norm": 1.7020398378372192, "learning_rate": 9.7923085120466e-10, "loss": 0.7585, "step": 28138 }, { "epoch": 0.9939202345367766, "grad_norm": 1.8265936374664307, "learning_rate": 9.679433460063614e-10, "loss": 0.8082, "step": 28139 }, { "epoch": 0.9939555563404845, "grad_norm": 1.6885408163070679, "learning_rate": 9.567212672001446e-10, "loss": 0.7658, "step": 28140 }, { "epoch": 0.9939908781441924, "grad_norm": 1.7273222208023071, "learning_rate": 9.45564614932004e-10, "loss": 0.7101, "step": 28141 }, { "epoch": 0.9940261999479003, "grad_norm": 2.035787582397461, "learning_rate": 9.344733893490442e-10, "loss": 0.7547, "step": 28142 }, { "epoch": 0.9940615217516082, "grad_norm": 1.6605778932571411, "learning_rate": 9.234475905961493e-10, "loss": 0.7228, "step": 28143 }, { "epoch": 0.9940968435553161, "grad_norm": 1.8199820518493652, "learning_rate": 9.12487218817093e-10, "loss": 0.7689, "step": 28144 }, { "epoch": 0.994132165359024, "grad_norm": 1.774369478225708, "learning_rate": 9.015922741556493e-10, "loss": 0.7871, "step": 28145 }, { "epoch": 0.994167487162732, "grad_norm": 3.2291598320007324, "learning_rate": 8.907627567544818e-10, "loss": 0.7605, "step": 28146 }, { "epoch": 0.9942028089664399, "grad_norm": 1.973546028137207, "learning_rate": 8.799986667556992e-10, "loss": 0.7825, "step": 28147 }, { "epoch": 0.9942381307701478, "grad_norm": 1.5877398252487183, "learning_rate": 8.693000042991894e-10, "loss": 0.7965, "step": 28148 }, { "epoch": 0.9942734525738557, "grad_norm": 1.5502395629882812, "learning_rate": 8.586667695259509e-10, "loss": 0.7686, "step": 28149 }, { "epoch": 0.9943087743775636, "grad_norm": 1.6466352939605713, "learning_rate": 8.480989625742065e-10, "loss": 0.7647, "step": 28150 }, { "epoch": 0.9943440961812715, "grad_norm": 1.7293920516967773, "learning_rate": 8.375965835832889e-10, "loss": 0.7551, "step": 28151 }, { "epoch": 0.9943794179849794, "grad_norm": 1.9636485576629639, "learning_rate": 8.271596326903108e-10, "loss": 0.7615, "step": 28152 }, { "epoch": 0.9944147397886873, "grad_norm": 1.9469079971313477, "learning_rate": 8.167881100312747e-10, "loss": 0.7465, "step": 28153 }, { "epoch": 0.9944500615923952, "grad_norm": 1.7127748727798462, "learning_rate": 8.064820157427377e-10, "loss": 0.7433, "step": 28154 }, { "epoch": 0.9944853833961032, "grad_norm": 1.7649954557418823, "learning_rate": 7.962413499590371e-10, "loss": 0.7596, "step": 28155 }, { "epoch": 0.9945207051998111, "grad_norm": 1.7007956504821777, "learning_rate": 7.860661128139546e-10, "loss": 0.7826, "step": 28156 }, { "epoch": 0.994556027003519, "grad_norm": 1.622491717338562, "learning_rate": 7.759563044418273e-10, "loss": 0.7764, "step": 28157 }, { "epoch": 0.9945913488072269, "grad_norm": 1.578321933746338, "learning_rate": 7.659119249736613e-10, "loss": 0.7299, "step": 28158 }, { "epoch": 0.9946266706109348, "grad_norm": 1.6735904216766357, "learning_rate": 7.559329745421285e-10, "loss": 0.7607, "step": 28159 }, { "epoch": 0.9946619924146427, "grad_norm": 1.6195439100265503, "learning_rate": 7.460194532765696e-10, "loss": 0.7445, "step": 28160 }, { "epoch": 0.9946973142183506, "grad_norm": 1.8695943355560303, "learning_rate": 7.361713613074362e-10, "loss": 0.7507, "step": 28161 }, { "epoch": 0.9947326360220584, "grad_norm": 1.5434365272521973, "learning_rate": 7.263886987640689e-10, "loss": 0.7473, "step": 28162 }, { "epoch": 0.9947679578257663, "grad_norm": 1.597313404083252, "learning_rate": 7.166714657735885e-10, "loss": 0.7308, "step": 28163 }, { "epoch": 0.9948032796294742, "grad_norm": 1.601318597793579, "learning_rate": 7.070196624631154e-10, "loss": 0.7173, "step": 28164 }, { "epoch": 0.9948386014331821, "grad_norm": 1.4679720401763916, "learning_rate": 6.974332889597701e-10, "loss": 0.7244, "step": 28165 }, { "epoch": 0.9948739232368901, "grad_norm": 2.3392064571380615, "learning_rate": 6.87912345389008e-10, "loss": 0.8183, "step": 28166 }, { "epoch": 0.994909245040598, "grad_norm": 1.4030888080596924, "learning_rate": 6.784568318746187e-10, "loss": 0.7522, "step": 28167 }, { "epoch": 0.9949445668443059, "grad_norm": 1.7450178861618042, "learning_rate": 6.690667485409474e-10, "loss": 0.7877, "step": 28168 }, { "epoch": 0.9949798886480138, "grad_norm": 1.5868195295333862, "learning_rate": 6.597420955112288e-10, "loss": 0.7391, "step": 28169 }, { "epoch": 0.9950152104517217, "grad_norm": 2.7694942951202393, "learning_rate": 6.504828729064771e-10, "loss": 0.8043, "step": 28170 }, { "epoch": 0.9950505322554296, "grad_norm": 1.590232491493225, "learning_rate": 6.412890808482619e-10, "loss": 0.7457, "step": 28171 }, { "epoch": 0.9950858540591375, "grad_norm": 1.638363003730774, "learning_rate": 6.321607194575974e-10, "loss": 0.7787, "step": 28172 }, { "epoch": 0.9951211758628454, "grad_norm": 1.7837865352630615, "learning_rate": 6.230977888532775e-10, "loss": 0.7527, "step": 28173 }, { "epoch": 0.9951564976665533, "grad_norm": 1.7012327909469604, "learning_rate": 6.141002891540959e-10, "loss": 0.7533, "step": 28174 }, { "epoch": 0.9951918194702613, "grad_norm": 2.2569973468780518, "learning_rate": 6.051682204777365e-10, "loss": 0.7892, "step": 28175 }, { "epoch": 0.9952271412739692, "grad_norm": 1.712789535522461, "learning_rate": 5.963015829407726e-10, "loss": 0.7875, "step": 28176 }, { "epoch": 0.9952624630776771, "grad_norm": 1.7493312358856201, "learning_rate": 5.875003766597775e-10, "loss": 0.7681, "step": 28177 }, { "epoch": 0.995297784881385, "grad_norm": 0.8893473148345947, "learning_rate": 5.787646017502147e-10, "loss": 0.555, "step": 28178 }, { "epoch": 0.9953331066850929, "grad_norm": 2.0639865398406982, "learning_rate": 5.700942583258818e-10, "loss": 0.7697, "step": 28179 }, { "epoch": 0.9953684284888008, "grad_norm": 1.6754155158996582, "learning_rate": 5.614893465000215e-10, "loss": 0.753, "step": 28180 }, { "epoch": 0.9954037502925087, "grad_norm": 1.574934482574463, "learning_rate": 5.529498663858768e-10, "loss": 0.7883, "step": 28181 }, { "epoch": 0.9954390720962166, "grad_norm": 1.5371733903884888, "learning_rate": 5.444758180950249e-10, "loss": 0.7324, "step": 28182 }, { "epoch": 0.9954743938999245, "grad_norm": 1.7847379446029663, "learning_rate": 5.360672017379332e-10, "loss": 0.804, "step": 28183 }, { "epoch": 0.9955097157036324, "grad_norm": 1.7947020530700684, "learning_rate": 5.277240174250686e-10, "loss": 0.7811, "step": 28184 }, { "epoch": 0.9955450375073404, "grad_norm": 1.7773669958114624, "learning_rate": 5.194462652657883e-10, "loss": 0.7823, "step": 28185 }, { "epoch": 0.9955803593110483, "grad_norm": 1.600818157196045, "learning_rate": 5.112339453683391e-10, "loss": 0.7404, "step": 28186 }, { "epoch": 0.9956156811147562, "grad_norm": 2.0705482959747314, "learning_rate": 5.030870578398572e-10, "loss": 0.7749, "step": 28187 }, { "epoch": 0.995651002918464, "grad_norm": 1.6533453464508057, "learning_rate": 4.950056027869244e-10, "loss": 0.7758, "step": 28188 }, { "epoch": 0.9956863247221719, "grad_norm": 1.8435118198394775, "learning_rate": 4.869895803155667e-10, "loss": 0.7416, "step": 28189 }, { "epoch": 0.9957216465258798, "grad_norm": 1.9209026098251343, "learning_rate": 4.790389905312554e-10, "loss": 0.7658, "step": 28190 }, { "epoch": 0.9957569683295877, "grad_norm": 1.864528775215149, "learning_rate": 4.711538335366861e-10, "loss": 0.7564, "step": 28191 }, { "epoch": 0.9957922901332956, "grad_norm": 1.6922773122787476, "learning_rate": 4.633341094362198e-10, "loss": 0.7571, "step": 28192 }, { "epoch": 0.9958276119370035, "grad_norm": 1.9191328287124634, "learning_rate": 4.5557981833199703e-10, "loss": 0.7366, "step": 28193 }, { "epoch": 0.9958629337407114, "grad_norm": 1.6468234062194824, "learning_rate": 4.478909603250481e-10, "loss": 0.769, "step": 28194 }, { "epoch": 0.9958982555444194, "grad_norm": 1.757465124130249, "learning_rate": 4.402675355164032e-10, "loss": 0.7557, "step": 28195 }, { "epoch": 0.9959335773481273, "grad_norm": 1.7526564598083496, "learning_rate": 4.327095440059825e-10, "loss": 0.7519, "step": 28196 }, { "epoch": 0.9959688991518352, "grad_norm": 1.9147175550460815, "learning_rate": 4.2521698589204074e-10, "loss": 0.7688, "step": 28197 }, { "epoch": 0.9960042209555431, "grad_norm": 1.7532458305358887, "learning_rate": 4.1778986127338774e-10, "loss": 0.7565, "step": 28198 }, { "epoch": 0.996039542759251, "grad_norm": 1.734725832939148, "learning_rate": 4.1042817024661285e-10, "loss": 0.7679, "step": 28199 }, { "epoch": 0.9960748645629589, "grad_norm": 1.5753885507583618, "learning_rate": 4.031319129083056e-10, "loss": 0.7757, "step": 28200 }, { "epoch": 0.9961101863666668, "grad_norm": 1.7474433183670044, "learning_rate": 3.959010893545001e-10, "loss": 0.8039, "step": 28201 }, { "epoch": 0.9961455081703747, "grad_norm": 1.6722644567489624, "learning_rate": 3.8873569967845527e-10, "loss": 0.7411, "step": 28202 }, { "epoch": 0.9961808299740826, "grad_norm": 1.6213243007659912, "learning_rate": 3.816357439756502e-10, "loss": 0.7259, "step": 28203 }, { "epoch": 0.9962161517777905, "grad_norm": 1.833177924156189, "learning_rate": 3.7460122233767826e-10, "loss": 0.7501, "step": 28204 }, { "epoch": 0.9962514735814985, "grad_norm": 1.6314685344696045, "learning_rate": 3.6763213485724315e-10, "loss": 0.7803, "step": 28205 }, { "epoch": 0.9962867953852064, "grad_norm": 1.6265389919281006, "learning_rate": 3.607284816253831e-10, "loss": 0.7362, "step": 28206 }, { "epoch": 0.9963221171889143, "grad_norm": 1.5983933210372925, "learning_rate": 3.538902627325813e-10, "loss": 0.7442, "step": 28207 }, { "epoch": 0.9963574389926222, "grad_norm": 1.697511076927185, "learning_rate": 3.471174782682107e-10, "loss": 0.7627, "step": 28208 }, { "epoch": 0.9963927607963301, "grad_norm": 1.703381061553955, "learning_rate": 3.4041012832053413e-10, "loss": 0.7524, "step": 28209 }, { "epoch": 0.996428082600038, "grad_norm": 1.5955208539962769, "learning_rate": 3.337682129783693e-10, "loss": 0.7747, "step": 28210 }, { "epoch": 0.9964634044037459, "grad_norm": 1.63602614402771, "learning_rate": 3.2719173232775845e-10, "loss": 0.7803, "step": 28211 }, { "epoch": 0.9964987262074538, "grad_norm": 1.672472596168518, "learning_rate": 3.2068068645474406e-10, "loss": 0.7415, "step": 28212 }, { "epoch": 0.9965340480111617, "grad_norm": 1.835396647453308, "learning_rate": 3.1423507544536823e-10, "loss": 0.7379, "step": 28213 }, { "epoch": 0.9965693698148695, "grad_norm": 1.6080219745635986, "learning_rate": 3.078548993828978e-10, "loss": 0.7391, "step": 28214 }, { "epoch": 0.9966046916185775, "grad_norm": 1.8653780221939087, "learning_rate": 3.015401583517097e-10, "loss": 0.7609, "step": 28215 }, { "epoch": 0.9966400134222854, "grad_norm": 1.9728991985321045, "learning_rate": 2.952908524339604e-10, "loss": 0.7688, "step": 28216 }, { "epoch": 0.9966753352259933, "grad_norm": 1.7386507987976074, "learning_rate": 2.891069817118064e-10, "loss": 0.7976, "step": 28217 }, { "epoch": 0.9967106570297012, "grad_norm": 1.6352607011795044, "learning_rate": 2.829885462657389e-10, "loss": 0.7647, "step": 28218 }, { "epoch": 0.9967459788334091, "grad_norm": 1.7953225374221802, "learning_rate": 2.7693554617624906e-10, "loss": 0.754, "step": 28219 }, { "epoch": 0.996781300637117, "grad_norm": 1.6268185377120972, "learning_rate": 2.7094798152216275e-10, "loss": 0.7279, "step": 28220 }, { "epoch": 0.9968166224408249, "grad_norm": 1.7159775495529175, "learning_rate": 2.6502585238230574e-10, "loss": 0.766, "step": 28221 }, { "epoch": 0.9968519442445328, "grad_norm": 1.474948525428772, "learning_rate": 2.591691588338385e-10, "loss": 0.7132, "step": 28222 }, { "epoch": 0.9968872660482407, "grad_norm": 2.536407470703125, "learning_rate": 2.5337790095336656e-10, "loss": 0.7908, "step": 28223 }, { "epoch": 0.9969225878519486, "grad_norm": 41.5110969543457, "learning_rate": 2.476520788169401e-10, "loss": 0.766, "step": 28224 }, { "epoch": 0.9969579096556566, "grad_norm": 2.7182886600494385, "learning_rate": 2.4199169249949914e-10, "loss": 0.7687, "step": 28225 }, { "epoch": 0.9969932314593645, "grad_norm": 1.693512201309204, "learning_rate": 2.363967420748736e-10, "loss": 0.746, "step": 28226 }, { "epoch": 0.9970285532630724, "grad_norm": 1.6500468254089355, "learning_rate": 2.308672276168933e-10, "loss": 0.7766, "step": 28227 }, { "epoch": 0.9970638750667803, "grad_norm": 1.5910396575927734, "learning_rate": 2.2540314919716754e-10, "loss": 0.7433, "step": 28228 }, { "epoch": 0.9970991968704882, "grad_norm": 1.6045513153076172, "learning_rate": 2.2000450688730578e-10, "loss": 0.7478, "step": 28229 }, { "epoch": 0.9971345186741961, "grad_norm": 2.029860019683838, "learning_rate": 2.1467130075836228e-10, "loss": 0.7442, "step": 28230 }, { "epoch": 0.997169840477904, "grad_norm": 1.6476634740829468, "learning_rate": 2.0940353087972597e-10, "loss": 0.7236, "step": 28231 }, { "epoch": 0.9972051622816119, "grad_norm": 1.6513988971710205, "learning_rate": 2.042011973207858e-10, "loss": 0.7303, "step": 28232 }, { "epoch": 0.9972404840853198, "grad_norm": 12.214149475097656, "learning_rate": 1.9906430014926537e-10, "loss": 0.7466, "step": 28233 }, { "epoch": 0.9972758058890278, "grad_norm": 1.7412444353103638, "learning_rate": 1.9399283943233317e-10, "loss": 0.7803, "step": 28234 }, { "epoch": 0.9973111276927357, "grad_norm": 2.063448667526245, "learning_rate": 1.8898681523715768e-10, "loss": 0.7214, "step": 28235 }, { "epoch": 0.9973464494964436, "grad_norm": 1.6979118585586548, "learning_rate": 1.8404622762813185e-10, "loss": 0.7398, "step": 28236 }, { "epoch": 0.9973817713001515, "grad_norm": 1.7648628950119019, "learning_rate": 1.7917107667075883e-10, "loss": 0.7512, "step": 28237 }, { "epoch": 0.9974170931038594, "grad_norm": 1.6959322690963745, "learning_rate": 1.7436136242832136e-10, "loss": 0.7463, "step": 28238 }, { "epoch": 0.9974524149075673, "grad_norm": 1.7639834880828857, "learning_rate": 1.6961708496410211e-10, "loss": 0.7349, "step": 28239 }, { "epoch": 0.9974877367112751, "grad_norm": 1.6484718322753906, "learning_rate": 1.6493824434027361e-10, "loss": 0.7415, "step": 28240 }, { "epoch": 0.997523058514983, "grad_norm": 0.9334971904754639, "learning_rate": 1.6032484061734298e-10, "loss": 0.5697, "step": 28241 }, { "epoch": 0.9975583803186909, "grad_norm": 1.6943774223327637, "learning_rate": 1.5577687385692763e-10, "loss": 0.7921, "step": 28242 }, { "epoch": 0.9975937021223988, "grad_norm": 1.5698362588882446, "learning_rate": 1.5129434411731425e-10, "loss": 0.7425, "step": 28243 }, { "epoch": 0.9976290239261068, "grad_norm": 1.9905009269714355, "learning_rate": 1.4687725145789977e-10, "loss": 0.7608, "step": 28244 }, { "epoch": 0.9976643457298147, "grad_norm": 1.606274962425232, "learning_rate": 1.425255959358607e-10, "loss": 0.7314, "step": 28245 }, { "epoch": 0.9976996675335226, "grad_norm": 1.6178008317947388, "learning_rate": 1.3823937760892857e-10, "loss": 0.7801, "step": 28246 }, { "epoch": 0.9977349893372305, "grad_norm": 1.6886889934539795, "learning_rate": 1.3401859653316974e-10, "loss": 0.7518, "step": 28247 }, { "epoch": 0.9977703111409384, "grad_norm": 1.7505861520767212, "learning_rate": 1.2986325276298506e-10, "loss": 0.7435, "step": 28248 }, { "epoch": 0.9978056329446463, "grad_norm": 1.908158302307129, "learning_rate": 1.2577334635333061e-10, "loss": 0.7503, "step": 28249 }, { "epoch": 0.9978409547483542, "grad_norm": 2.5293102264404297, "learning_rate": 1.217488773580522e-10, "loss": 0.7624, "step": 28250 }, { "epoch": 0.9978762765520621, "grad_norm": 1.8035558462142944, "learning_rate": 1.1778984582877518e-10, "loss": 0.7711, "step": 28251 }, { "epoch": 0.99791159835577, "grad_norm": 1.5979838371276855, "learning_rate": 1.1389625181879028e-10, "loss": 0.7883, "step": 28252 }, { "epoch": 0.997946920159478, "grad_norm": 1.772378921508789, "learning_rate": 1.100680953775024e-10, "loss": 0.7508, "step": 28253 }, { "epoch": 0.9979822419631859, "grad_norm": 1.656425952911377, "learning_rate": 1.0630537655653694e-10, "loss": 0.7705, "step": 28254 }, { "epoch": 0.9980175637668938, "grad_norm": 1.79521906375885, "learning_rate": 1.0260809540363348e-10, "loss": 0.7364, "step": 28255 }, { "epoch": 0.9980528855706017, "grad_norm": 1.7562822103500366, "learning_rate": 9.897625196819693e-11, "loss": 0.8094, "step": 28256 }, { "epoch": 0.9980882073743096, "grad_norm": 1.5612584352493286, "learning_rate": 9.54098462979669e-11, "loss": 0.7271, "step": 28257 }, { "epoch": 0.9981235291780175, "grad_norm": 1.854962944984436, "learning_rate": 9.190887843846252e-11, "loss": 0.7508, "step": 28258 }, { "epoch": 0.9981588509817254, "grad_norm": 1.5966750383377075, "learning_rate": 8.847334843631316e-11, "loss": 0.7506, "step": 28259 }, { "epoch": 0.9981941727854333, "grad_norm": 1.9831281900405884, "learning_rate": 8.510325633648287e-11, "loss": 0.736, "step": 28260 }, { "epoch": 0.9982294945891412, "grad_norm": 1.838888168334961, "learning_rate": 8.179860218282542e-11, "loss": 0.7439, "step": 28261 }, { "epoch": 0.9982648163928491, "grad_norm": 1.5828723907470703, "learning_rate": 7.855938601919467e-11, "loss": 0.7519, "step": 28262 }, { "epoch": 0.998300138196557, "grad_norm": 1.4983388185501099, "learning_rate": 7.538560788666882e-11, "loss": 0.7619, "step": 28263 }, { "epoch": 0.998335460000265, "grad_norm": 2.956355094909668, "learning_rate": 7.22772678279915e-11, "loss": 0.7693, "step": 28264 }, { "epoch": 0.9983707818039729, "grad_norm": 1.7427035570144653, "learning_rate": 6.923436588368582e-11, "loss": 0.7426, "step": 28265 }, { "epoch": 0.9984061036076807, "grad_norm": 1.6610255241394043, "learning_rate": 6.625690209316471e-11, "loss": 0.7671, "step": 28266 }, { "epoch": 0.9984414254113886, "grad_norm": 1.8024975061416626, "learning_rate": 6.33448764958411e-11, "loss": 0.7984, "step": 28267 }, { "epoch": 0.9984767472150965, "grad_norm": 1.6661977767944336, "learning_rate": 6.049828912946254e-11, "loss": 0.7358, "step": 28268 }, { "epoch": 0.9985120690188044, "grad_norm": 1.7160851955413818, "learning_rate": 5.771714003122153e-11, "loss": 0.7664, "step": 28269 }, { "epoch": 0.9985473908225123, "grad_norm": 1.7781486511230469, "learning_rate": 5.500142923775542e-11, "loss": 0.7865, "step": 28270 }, { "epoch": 0.9985827126262202, "grad_norm": 1.6357542276382446, "learning_rate": 5.2351156784036236e-11, "loss": 0.7458, "step": 28271 }, { "epoch": 0.9986180344299281, "grad_norm": 2.0014922618865967, "learning_rate": 4.976632270614623e-11, "loss": 0.7575, "step": 28272 }, { "epoch": 0.998653356233636, "grad_norm": 1.8374673128128052, "learning_rate": 4.724692703628186e-11, "loss": 0.7504, "step": 28273 }, { "epoch": 0.998688678037344, "grad_norm": 1.8887083530426025, "learning_rate": 4.479296980886005e-11, "loss": 0.7431, "step": 28274 }, { "epoch": 0.9987239998410519, "grad_norm": 1.7205398082733154, "learning_rate": 4.2404451054967044e-11, "loss": 0.7611, "step": 28275 }, { "epoch": 0.9987593216447598, "grad_norm": 1.6437424421310425, "learning_rate": 4.008137080624419e-11, "loss": 0.7646, "step": 28276 }, { "epoch": 0.9987946434484677, "grad_norm": 1.695355772972107, "learning_rate": 3.782372909266752e-11, "loss": 0.7625, "step": 28277 }, { "epoch": 0.9988299652521756, "grad_norm": 1.7412729263305664, "learning_rate": 3.5631525944768155e-11, "loss": 0.782, "step": 28278 }, { "epoch": 0.9988652870558835, "grad_norm": 1.6361652612686157, "learning_rate": 3.350476139030168e-11, "loss": 0.782, "step": 28279 }, { "epoch": 0.9989006088595914, "grad_norm": 1.7312731742858887, "learning_rate": 3.144343545757877e-11, "loss": 0.7568, "step": 28280 }, { "epoch": 0.9989359306632993, "grad_norm": 1.6548691987991333, "learning_rate": 2.94475481732448e-11, "loss": 0.72, "step": 28281 }, { "epoch": 0.9989712524670072, "grad_norm": 1.7734326124191284, "learning_rate": 2.7517099563389993e-11, "loss": 0.7337, "step": 28282 }, { "epoch": 0.9990065742707152, "grad_norm": 1.683258295059204, "learning_rate": 2.5652089654104596e-11, "loss": 0.7538, "step": 28283 }, { "epoch": 0.9990418960744231, "grad_norm": 1.6351042985916138, "learning_rate": 2.385251846870329e-11, "loss": 0.7512, "step": 28284 }, { "epoch": 0.999077217878131, "grad_norm": 1.666824221611023, "learning_rate": 2.2118386031610984e-11, "loss": 0.7173, "step": 28285 }, { "epoch": 0.9991125396818389, "grad_norm": 2.483992338180542, "learning_rate": 2.0449692364477024e-11, "loss": 0.7655, "step": 28286 }, { "epoch": 0.9991478614855468, "grad_norm": 2.0249457359313965, "learning_rate": 1.8846437490616098e-11, "loss": 0.7851, "step": 28287 }, { "epoch": 0.9991831832892547, "grad_norm": 1.650375247001648, "learning_rate": 1.7308621430012218e-11, "loss": 0.7592, "step": 28288 }, { "epoch": 0.9992185050929626, "grad_norm": 2.064789056777954, "learning_rate": 1.58362442026494e-11, "loss": 0.7468, "step": 28289 }, { "epoch": 0.9992538268966705, "grad_norm": 1.6025598049163818, "learning_rate": 1.4429305828511653e-11, "loss": 0.7665, "step": 28290 }, { "epoch": 0.9992891487003784, "grad_norm": 1.958305835723877, "learning_rate": 1.308780632536255e-11, "loss": 0.7338, "step": 28291 }, { "epoch": 0.9993244705040862, "grad_norm": 1.6781889200210571, "learning_rate": 1.181174571096566e-11, "loss": 0.7725, "step": 28292 }, { "epoch": 0.9993597923077941, "grad_norm": 1.8560945987701416, "learning_rate": 1.0601124001974329e-11, "loss": 0.7858, "step": 28293 }, { "epoch": 0.9993951141115021, "grad_norm": 1.7697376012802124, "learning_rate": 9.455941214486786e-12, "loss": 0.7692, "step": 28294 }, { "epoch": 0.99943043591521, "grad_norm": 1.78361177444458, "learning_rate": 8.376197363491046e-12, "loss": 0.7584, "step": 28295 }, { "epoch": 0.9994657577189179, "grad_norm": 1.7077921628952026, "learning_rate": 7.361892462864894e-12, "loss": 0.7307, "step": 28296 }, { "epoch": 0.9995010795226258, "grad_norm": 1.8152862787246704, "learning_rate": 6.41302652593101e-12, "loss": 0.7842, "step": 28297 }, { "epoch": 0.9995364013263337, "grad_norm": 1.5368543863296509, "learning_rate": 5.529599565456955e-12, "loss": 0.7365, "step": 28298 }, { "epoch": 0.9995717231300416, "grad_norm": 1.639922022819519, "learning_rate": 4.7116115925449625e-12, "loss": 0.7483, "step": 28299 }, { "epoch": 0.9996070449337495, "grad_norm": 1.758709192276001, "learning_rate": 3.95906261829726e-12, "loss": 0.7512, "step": 28300 }, { "epoch": 0.9996423667374574, "grad_norm": 1.6620196104049683, "learning_rate": 3.2719526515956333e-12, "loss": 0.7547, "step": 28301 }, { "epoch": 0.9996776885411653, "grad_norm": 0.9623702764511108, "learning_rate": 2.650281702987201e-12, "loss": 0.6037, "step": 28302 }, { "epoch": 0.9997130103448733, "grad_norm": 1.668992280960083, "learning_rate": 2.0940497791333e-12, "loss": 0.7546, "step": 28303 }, { "epoch": 0.9997483321485812, "grad_norm": 1.607353687286377, "learning_rate": 1.6032568883606047e-12, "loss": 0.7541, "step": 28304 }, { "epoch": 0.9997836539522891, "grad_norm": 1.9593231678009033, "learning_rate": 1.1779030367753407e-12, "loss": 0.7534, "step": 28305 }, { "epoch": 0.999818975755997, "grad_norm": 2.4579203128814697, "learning_rate": 8.179882299286235e-13, "loss": 0.7554, "step": 28306 }, { "epoch": 0.9998542975597049, "grad_norm": 1.6120463609695435, "learning_rate": 5.235124722613449e-13, "loss": 0.7519, "step": 28307 }, { "epoch": 0.9998896193634128, "grad_norm": 1.8549546003341675, "learning_rate": 2.944757676592858e-13, "loss": 0.7686, "step": 28308 }, { "epoch": 0.9999249411671207, "grad_norm": 1.9258867502212524, "learning_rate": 1.3087812000822652e-13, "loss": 0.7329, "step": 28309 }, { "epoch": 0.9999602629708286, "grad_norm": 0.9477213621139526, "learning_rate": 3.2719529863278756e-14, "loss": 0.5514, "step": 28310 }, { "epoch": 0.9999955847745365, "grad_norm": 1.2980470657348633, "learning_rate": 0.0, "loss": 0.6525, "step": 28311 }, { "epoch": 0.9999955847745365, "step": 28311, "total_flos": 7.494160118642744e+19, "train_loss": 0.8134805017288872, "train_runtime": 294764.1692, "train_samples_per_second": 24.588, "train_steps_per_second": 0.096 } ], "logging_steps": 1.0, "max_steps": 28311, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.494160118642744e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }